From 6f3c5f1c23c25fa439a9f656afef0a0b3495ffac Mon Sep 17 00:00:00 2001 From: limm Date: Thu, 11 Jul 2024 17:25:35 +0800 Subject: [PATCH] support v1.4.0 --- CONTRIBUTING.md | 261 +-- CONTRIBUTING_zh-CN.md | 274 --- Dockerfile | 7 + LICENSES.md | 11 +- MANIFEST.in | 3 +- README.md | 15 +- README_ORIGIN.md | 253 +- README_zh-CN.md | 255 +- TERMINOLOGY.md | 48 +- docker/README.md | 70 - docker/dev/Dockerfile | 31 - docker/release/Dockerfile | 23 - docs/{en => }/Makefile | 0 docs/{en => }/_static/community/1.png | Bin docs/{en => }/_static/community/2.png | Bin docs/{en => }/_static/community/3.png | Bin docs/{en => }/_static/css/readthedocs.css | 4 - docs/{en => }/_static/flow_img2toimg1.png | Bin docs/{en => }/_static/flow_raw_images.png | Bin docs/{en => }/_static/flow_visualization.png | Bin docs/{en => }/_static/flow_warp.png | Bin docs/{en => }/_static/flow_warp_diff.png | Bin docs/{en => }/_static/image/mmcv-logo.png | Bin docs/{en => }/_static/parallel_progress.gif | Bin docs/{en => }/_static/parallel_progress.png | Bin docs/{en => }/_static/progress.gif | Bin docs/{en => }/_static/progress.png | Bin docs/_static/qq_group_qrcode.jpg | Bin 0 -> 71955 bytes docs/_static/zhihu_qrcode.jpg | Bin 0 -> 397245 bytes docs/api.rst | 44 + docs/community/contributing.md | 1 + docs/community/pr.md | 94 + docs/{en => }/compatibility.md | 0 docs/{zh_cn => }/conf.py | 128 +- .../deployment/mmcv_ops_definition.md | 130 +- docs/deployment/onnx.md | 19 + docs/deployment/onnxruntime_custom_ops.md | 378 +++ docs/deployment/onnxruntime_op.md | 126 + docs/deployment/tensorrt_custom_ops.md | 395 ++++ docs/deployment/tensorrt_plugin.md | 178 ++ docs/en/_static/version.json | 575 ----- docs/en/_templates/classtemplate.rst | 14 - docs/en/api/arraymisc.rst | 19 - docs/en/api/cnn.rst | 71 - docs/en/api/image.rst | 100 - docs/en/api/ops.rst | 135 -- docs/en/api/transforms.rst | 60 - docs/en/api/utils.rst | 23 - docs/en/api/video.rst | 56 - docs/en/api/visualization.rst | 50 - docs/en/community/contributing.md | 267 --- docs/en/community/pr.md | 3 - docs/en/docutils.conf | 2 - docs/en/faq.md | 93 - docs/en/get_started/build.md | 292 --- docs/en/get_started/installation.md | 348 --- docs/en/get_started/introduction.md | 36 - docs/en/switch_language.md | 3 - docs/en/understand_mmcv/cnn.md | 120 - docs/en/understand_mmcv/data_transform.md | 341 --- docs/en/understand_mmcv/ops.md | 66 - docs/faq.md | 42 + docs/get_started/build.md | 234 ++ docs/get_started/installation.md | 162 ++ docs/get_started/introduction.md | 29 + .../{en => }/get_started/previous_versions.md | 4 +- docs/{en => }/index.rst | 29 +- docs/{en => }/make.bat | 0 docs/{en => }/mmcv-logo.png | Bin docs/understand_mmcv/cnn.md | 538 +++++ docs/understand_mmcv/config.md | 200 ++ docs/{en => }/understand_mmcv/data_process.md | 24 +- docs/understand_mmcv/io.md | 247 ++ docs/understand_mmcv/ops.md | 37 + docs/understand_mmcv/registry.md | 155 ++ docs/understand_mmcv/runner.md | 163 ++ docs/understand_mmcv/utils.md | 74 + .../{en => }/understand_mmcv/visualization.md | 0 docs/zh_cn/_static/version.json | 575 ----- docs/zh_cn/_templates/classtemplate.rst | 14 - docs/zh_cn/api/arraymisc.rst | 19 - docs/zh_cn/api/cnn.rst | 71 - docs/zh_cn/api/image.rst | 100 - docs/zh_cn/api/ops.rst | 135 -- docs/zh_cn/api/transforms.rst | 60 - docs/zh_cn/api/utils.rst | 23 - docs/zh_cn/api/video.rst | 56 - docs/zh_cn/api/visualization.rst | 50 - docs/zh_cn/community/code_style.md | 609 ----- docs/zh_cn/community/contributing.md | 278 --- docs/zh_cn/community/pr.md | 3 - docs/zh_cn/docutils.conf | 2 - docs/zh_cn/faq.md | 91 - docs/zh_cn/get_started/article.md | 63 - docs/zh_cn/get_started/build.md | 300 --- docs/zh_cn/get_started/installation.md | 369 --- docs/zh_cn/get_started/introduction.md | 36 - docs/zh_cn/switch_language.md | 3 - docs/zh_cn/understand_mmcv/cnn.md | 114 - docs/zh_cn/understand_mmcv/data_transform.md | 341 --- docs/zh_cn/understand_mmcv/ops.md | 66 - {docs/zh_cn => docs_zh_CN}/Makefile | 0 .../_static/css/readthedocs.css | 4 - .../_static/image/mmcv-logo.png | Bin docs_zh_CN/api.rst | 44 + docs_zh_CN/community/contributing.md | 69 + docs_zh_CN/community/pr.md | 90 + {docs/zh_cn => docs_zh_CN}/compatibility.md | 0 {docs/en => docs_zh_CN}/conf.py | 132 +- docs_zh_CN/deployment/onnx.md | 19 + .../deployment/onnxruntime_custom_ops.md | 333 +++ docs_zh_CN/deployment/onnxruntime_op.md | 127 + docs_zh_CN/deployment/tensorrt_custom_ops.md | 391 ++++ docs_zh_CN/deployment/tensorrt_plugin.md | 177 ++ docs_zh_CN/faq.md | 37 + docs_zh_CN/get_started/build.md | 222 ++ docs_zh_CN/get_started/installation.md | 158 ++ docs_zh_CN/get_started/introduction.md | 30 + .../get_started/previous_versions.md | 5 +- {docs/zh_cn => docs_zh_CN}/index.rst | 30 +- {docs/zh_cn => docs_zh_CN}/make.bat | 0 {docs/zh_cn => docs_zh_CN}/mmcv-logo.png | 0 docs_zh_CN/understand_mmcv/cnn.md | 525 +++++ docs_zh_CN/understand_mmcv/config.md | 176 ++ .../understand_mmcv/data_process.md | 18 +- docs_zh_CN/understand_mmcv/io.md | 240 ++ docs_zh_CN/understand_mmcv/ops.md | 36 + docs_zh_CN/understand_mmcv/registry.md | 149 ++ docs_zh_CN/understand_mmcv/runner.md | 155 ++ docs_zh_CN/understand_mmcv/utils.md | 69 + .../understand_mmcv/visualization.md | 0 examples/train.py | 84 + mmcv/__init__.py | 6 +- mmcv/arraymisc/quantization.py | 22 +- mmcv/cnn/__init__.py | 35 +- mmcv/cnn/alexnet.py | 12 +- mmcv/cnn/bricks/__init__.py | 13 +- mmcv/cnn/bricks/activation.py | 50 +- mmcv/cnn/bricks/context_block.py | 25 +- mmcv/cnn/bricks/conv.py | 27 +- mmcv/cnn/bricks/conv2d_adaptive_padding.py | 25 +- mmcv/cnn/bricks/conv_module.py | 58 +- mmcv/cnn/bricks/conv_ws.py | 77 +- .../bricks/depthwise_separable_conv_module.py | 43 +- mmcv/cnn/bricks/drop.py | 26 +- mmcv/cnn/bricks/generalized_attention.py | 33 +- mmcv/cnn/bricks/hsigmoid.py | 34 +- mmcv/cnn/bricks/hswish.py | 22 +- mmcv/cnn/bricks/non_local.py | 66 +- mmcv/cnn/bricks/norm.py | 57 +- mmcv/cnn/bricks/padding.py | 27 +- mmcv/cnn/bricks/plugin.py | 40 +- mmcv/cnn/bricks/registry.py | 16 + mmcv/cnn/bricks/scale.py | 42 +- mmcv/cnn/bricks/swish.py | 9 +- mmcv/cnn/bricks/transformer.py | 432 +--- mmcv/cnn/bricks/upsample.py | 38 +- mmcv/cnn/bricks/wrappers.py | 37 +- mmcv/cnn/builder.py | 30 + mmcv/cnn/resnet.py | 103 +- mmcv/cnn/rfsearch/__init__.py | 5 - mmcv/cnn/rfsearch/operator.py | 169 -- mmcv/cnn/rfsearch/search.py | 239 -- mmcv/cnn/rfsearch/utils.py | 68 - mmcv/cnn/utils/__init__.py | 16 +- mmcv/cnn/utils/flops_counter.py | 175 +- mmcv/cnn/utils/fuse_conv_bn.py | 4 +- mmcv/cnn/utils/sync_bn.py | 59 + mmcv/cnn/utils/weight_init.py | 684 ++++++ mmcv/cnn/vgg.py | 53 +- mmcv/engine/__init__.py | 8 + mmcv/engine/test.py | 202 ++ mmcv/fileio/__init__.py | 11 + mmcv/fileio/file_client.py | 1148 +++++++++ mmcv/fileio/handlers/__init__.py | 7 + mmcv/fileio/handlers/base.py | 30 + mmcv/fileio/handlers/json_handler.py | 36 + mmcv/fileio/handlers/pickle_handler.py | 28 + mmcv/fileio/handlers/yaml_handler.py | 24 + mmcv/fileio/io.py | 151 ++ mmcv/fileio/parse.py | 97 + mmcv/image/__init__.py | 11 +- mmcv/image/colorspace.py | 37 +- mmcv/image/geometric.py | 242 +- mmcv/image/io.py | 202 +- mmcv/image/misc.py | 32 +- mmcv/image/photometric.py | 207 +- mmcv/model_zoo/deprecated.json | 6 + mmcv/model_zoo/mmcls.json | 59 + mmcv/model_zoo/open_mmlab.json | 50 + mmcv/onnx/__init__.py | 5 + mmcv/onnx/info.py | 21 + mmcv/onnx/onnx_utils/__init__.py | 1 + mmcv/onnx/onnx_utils/symbolic_helper.py | 331 +++ mmcv/onnx/symbolic.py | 496 ++++ mmcv/ops/__init__.py | 64 +- mmcv/ops/active_rotated_filter.py | 64 - mmcv/ops/assign_score_withk.py | 26 +- mmcv/ops/ball_query.py | 78 +- mmcv/ops/bbox.py | 70 +- mmcv/ops/bezier_align.py | 137 -- mmcv/ops/bias_act.py | 375 --- mmcv/ops/border_align.py | 33 +- mmcv/ops/box_iou_quadri.py | 49 - mmcv/ops/box_iou_rotated.py | 123 +- mmcv/ops/carafe.py | 77 +- mmcv/ops/cc_attention.py | 20 +- mmcv/ops/chamfer_distance.py | 93 - mmcv/ops/contour_expand.py | 17 +- mmcv/ops/conv2d_gradfix.py | 346 --- mmcv/ops/convex_iou.py | 52 - mmcv/ops/corner_pool.py | 148 +- mmcv/ops/correlation.py | 22 +- mmcv/ops/csrc/README.md | 216 +- .../ops/csrc/common/box_iou_rotated_utils.hpp | 83 - .../active_rotated_filter_cuda_kernel.cuh | 59 - .../cuda/assign_score_withk_cuda_kernel.cuh | 132 +- .../common/cuda/ball_query_cuda_kernel.cuh | 49 +- .../common/cuda/bbox_overlaps_cuda_kernel.cuh | 175 +- .../common/cuda/bezier_align_cuda_kernel.cuh | 230 -- .../csrc/common/cuda/box_iou_quadri_cuda.cuh | 91 - .../csrc/common/cuda/carafe_cuda_kernel.cuh | 18 +- .../cuda/chamfer_distance_cuda_kernel.cuh | 101 - .../csrc/common/cuda/common_cuda_helper.hpp | 16 +- .../common/cuda/convex_iou_cuda_kernel.cuh | 831 ------- .../ops/csrc/common/cuda/correlation_cuda.cuh | 202 +- .../cuda/diff_iou_rotated_cuda_kernel.cuh | 137 -- .../common/cuda/gather_points_cuda_kernel.cuh | 28 +- .../common/cuda/group_points_cuda_kernel.cuh | 36 +- .../csrc/common/cuda/iou3d_cuda_kernel.cuh | 290 +-- mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh | 61 +- .../common/cuda/min_area_polygons_cuda.cuh | 300 --- .../cuda/ms_deform_attn_cuda_kernel.cuh | 137 +- mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh | 111 +- mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh | 141 -- .../ops/csrc/common/cuda/nms_rotated_cuda.cuh | 14 +- .../cuda/points_in_boxes_cuda_kernel.cuh | 48 +- .../cuda/points_in_polygons_cuda_kernel.cuh | 79 - .../common/cuda/prroi_pool_cuda_kernel.cuh | 381 --- .../cuda/riroi_align_rotated_cuda_kernel.cuh | 242 -- .../cuda/roi_align_rotated_cuda_kernel.cuh | 16 +- .../cuda/roiaware_pool3d_cuda_kernel.cuh | 252 +- .../cuda/roipoint_pool3d_cuda_kernel.cuh | 116 +- .../rotated_feature_align_cuda_kernel.cuh | 129 -- .../cuda/scatter_points_cuda_kernel.cuh | 4 +- mmcv/ops/csrc/common/cuda/spconv/indice.cuh | 236 -- .../csrc/common/cuda/spconv/reordering.cuh | 160 -- .../cuda/stack_ball_query_cuda_kernel.cuh | 68 - .../cuda/stack_group_points_cuda_kernel.cuh | 97 - .../cuda/three_interpolate_cuda_kernel.cuh | 42 +- .../csrc/common/cuda/three_nn_cuda_kernel.cuh | 75 +- .../common/cuda/voxelization_cuda_kernel.cuh | 57 +- .../common/mlu/bbox_overlaps_mlu_kernel.mlu | 322 --- .../ops/csrc/common/mlu/carafe_mlu_kernel.mlu | 552 ----- mmcv/ops/csrc/common/mlu/carafe_utils.hpp | 95 - .../ops/csrc/common/mlu/common_mlu_helper.hpp | 398 ---- .../common/mlu/deform_roi_pool_mlu_kernel.mlu | 712 ------ .../mlu/focal_loss_sigmoid_mlu_kernel.mlu | 888 ------- mmcv/ops/csrc/common/mlu/iou3d_mlu_kernel.mlu | 431 ---- mmcv/ops/csrc/common/mlu/iou3d_utils.hpp | 695 ------ .../common/mlu/masked_conv2d_mlu_kernel.mlu | 181 -- .../common/mlu/ms_deform_attn_mlu_kernel.mlu | 853 ------- mmcv/ops/csrc/common/mlu/nms_mlu_kernel.mlu | 483 ---- mmcv/ops/csrc/common/mlu/nms_utils.hpp | 553 ----- .../csrc/common/mlu/psamask_mlu_kernel.mlu | 615 ----- mmcv/ops/csrc/common/mlu/psamask_utils.hpp | 55 - .../csrc/common/mlu/roi_align_mlu_kernel.mlu | 493 ---- .../mlu/roi_align_rotated_mlu_kernel.mlu | 490 ---- .../common/mlu/roi_align_rotated_utils.hpp | 24 - .../csrc/common/mlu/roi_pool_mlu_kernel.mlu | 747 ------ .../common/mlu/roiaware_pool3d_mlu_kernel.mlu | 747 ------ ...oint_pool3d_large_boxes_num_mlu_kernel.mlu | 536 ----- .../common/mlu/roipoint_pool3d_mlu_kernel.mlu | 544 ----- .../csrc/common/mlu/three_nn_mlu_kernel.mlu | 466 ---- .../csrc/common/mlu/tin_shift_mlu_kernel.mlu | 307 --- mmcv/ops/csrc/common/mps/MPSDevice.h | 64 - mmcv/ops/csrc/common/mps/MPSLibrary.h | 61 - mmcv/ops/csrc/common/mps/MPSLibrary.mm | 107 - mmcv/ops/csrc/common/mps/MPSStream.h | 132 -- mmcv/ops/csrc/common/mps/MPSUtils.h | 51 - mmcv/ops/csrc/common/pytorch_cpp_helper.hpp | 11 +- mmcv/ops/csrc/common/pytorch_cuda_helper.hpp | 1 - mmcv/ops/csrc/common/pytorch_mlu_helper.hpp | 61 - mmcv/ops/csrc/common/pytorch_npu_helper.hpp | 35 - .../ops/csrc/common/utils/spconv/paramsgrid.h | 70 - .../csrc/common/utils/spconv/prettyprint.h | 493 ---- .../csrc/common/utils/spconv/pybind11_utils.h | 60 - .../common/utils/spconv/spconv/geometry.h | 295 --- .../csrc/common/utils/spconv/spconv/indice.h | 78 - .../csrc/common/utils/spconv/spconv/maxpool.h | 37 - .../common/utils/spconv/spconv/mp_helper.h | 50 - .../common/utils/spconv/spconv/point2voxel.h | 385 ---- .../common/utils/spconv/spconv/reordering.h | 36 - .../utils/spconv/tensorview/helper_kernel.cuh | 75 - .../utils/spconv/tensorview/helper_launch.h | 19 - .../utils/spconv/tensorview/tensorview.h | 1119 --------- mmcv/ops/csrc/onnxruntime/corner_pool.h | 46 + mmcv/ops/csrc/onnxruntime/cpu/corner_pool.cpp | 123 + mmcv/ops/csrc/onnxruntime/cpu/deform_conv.cpp | 263 +++ mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp | 314 +++ .../onnxruntime/cpu/modulated_deform_conv.cpp | 292 +++ mmcv/ops/csrc/onnxruntime/cpu/nms.cpp | 108 + .../onnxruntime/cpu/onnxruntime_register.cpp | 81 + mmcv/ops/csrc/onnxruntime/cpu/reduce_ops.cpp | 188 ++ mmcv/ops/csrc/onnxruntime/cpu/roi_align.cpp | 265 +++ .../onnxruntime/cpu/roi_align_rotated.cpp | 247 ++ mmcv/ops/csrc/onnxruntime/cpu/soft_nms.cpp | 156 ++ mmcv/ops/csrc/onnxruntime/deform_conv.h | 57 + mmcv/ops/csrc/onnxruntime/grid_sample.h | 44 + .../csrc/onnxruntime/modulated_deform_conv.h | 61 + mmcv/ops/csrc/onnxruntime/nms.h | 45 + .../csrc/onnxruntime/onnxruntime_register.h | 16 + .../onnxruntime_session_options_config_keys.h | 44 + mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h | 15 + mmcv/ops/csrc/onnxruntime/reduce_ops.h | 95 + mmcv/ops/csrc/onnxruntime/roi_align.h | 62 + mmcv/ops/csrc/onnxruntime/roi_align_rotated.h | 62 + mmcv/ops/csrc/onnxruntime/soft_nms.h | 49 + .../csrc/parrots/active_rotated_filter.cpp | 28 - .../parrots/active_rotated_filter_parrots.cpp | 63 - .../parrots/active_rotated_filter_pytorch.h | 13 - mmcv/ops/csrc/parrots/assign_score_withk.cpp | 73 +- mmcv/ops/csrc/parrots/ball_query.cpp | 31 +- mmcv/ops/csrc/parrots/bbox_overlaps.cpp | 26 +- mmcv/ops/csrc/parrots/border_align.cpp | 58 +- .../ops/csrc/parrots/border_align_parrots.cpp | 2 - mmcv/ops/csrc/parrots/box_iou_rotated.cpp | 24 +- mmcv/ops/csrc/parrots/box_iou_rotated_cpu.cpp | 33 + mmcv/ops/csrc/parrots/carafe.cpp | 74 +- mmcv/ops/csrc/parrots/carafe_naive.cpp | 61 +- mmcv/ops/csrc/parrots/chamfer_distance.cpp | 35 - .../csrc/parrots/chamfer_distance_parrots.cpp | 51 - .../csrc/parrots/chamfer_distance_pytorch.h | 16 - mmcv/ops/csrc/parrots/contour_expand.cpp | 1 + mmcv/ops/csrc/parrots/convex_iou.cpp | 23 - mmcv/ops/csrc/parrots/convex_iou_parrots.cpp | 40 - mmcv/ops/csrc/parrots/convex_iou_pytorch.h | 11 - mmcv/ops/csrc/parrots/corner_pool.cpp | 240 ++ mmcv/ops/csrc/parrots/corner_pool_parrots.cpp | 234 ++ mmcv/ops/csrc/parrots/corner_pool_pytorch.h | 15 + mmcv/ops/csrc/parrots/correlation.cpp | 74 +- mmcv/ops/csrc/parrots/cudabind.cpp | 1677 -------------- mmcv/ops/csrc/parrots/deform_conv.cpp | 141 +- mmcv/ops/csrc/parrots/deform_conv_cpu.cpp | 377 +++ mmcv/ops/csrc/parrots/deform_roi_pool.cpp | 70 +- mmcv/ops/csrc/parrots/diff_iou_rotated.cpp | 14 - .../csrc/parrots/diff_iou_rotated_parrots.cpp | 28 - .../csrc/parrots/diff_iou_rotated_pytorch.h | 10 - mmcv/ops/csrc/parrots/focal_loss.cpp | 116 +- .../csrc/parrots/furthest_point_sample.cpp | 60 +- .../ops/csrc/parrots/fused_bias_leakyrelu.cpp | 132 +- mmcv/ops/csrc/parrots/gather_points.cpp | 51 +- mmcv/ops/csrc/parrots/group_points.cpp | 50 +- mmcv/ops/csrc/parrots/info.cpp | 65 - mmcv/ops/csrc/parrots/iou3d.cpp | 232 +- mmcv/ops/csrc/parrots/iou3d_parrots.cpp | 34 +- mmcv/ops/csrc/parrots/iou3d_pytorch.h | 12 +- mmcv/ops/csrc/parrots/knn.cpp | 27 +- mmcv/ops/csrc/parrots/masked_conv2d.cpp | 64 +- mmcv/ops/csrc/parrots/min_area_polygons.cpp | 11 - .../parrots/min_area_polygons_parrots.cpp | 26 - .../csrc/parrots/min_area_polygons_pytorch.h | 9 - .../csrc/parrots/modulated_deform_conv.cpp | 195 +- .../parrots/modulated_deform_conv_cpu.cpp | 403 ++++ mmcv/ops/csrc/parrots/ms_deform_attn.cpp | 65 +- mmcv/ops/csrc/parrots/nms.cpp | 260 ++- .../nms_rotated_cpu.cpp} | 28 +- mmcv/ops/csrc/parrots/pixel_group.cpp | 125 +- mmcv/ops/csrc/parrots/points_in_boxes.cpp | 88 +- mmcv/ops/csrc/parrots/points_in_boxes_cpu.cpp | 53 + mmcv/ops/csrc/parrots/points_in_polygons.cpp | 15 - .../parrots/points_in_polygons_parrots.cpp | 28 - .../csrc/parrots/points_in_polygons_pytorch.h | 9 - mmcv/ops/csrc/parrots/prroi_pool.cpp | 47 - mmcv/ops/csrc/parrots/prroi_pool_parrots.cpp | 97 - mmcv/ops/csrc/parrots/prroi_pool_pytorch.h | 19 - mmcv/ops/csrc/parrots/psamask.cpp | 241 +- mmcv/ops/csrc/parrots/riroi_align_rotated.cpp | 42 - .../parrots/riroi_align_rotated_parrots.cpp | 86 - .../parrots/riroi_align_rotated_pytorch.h | 18 - mmcv/ops/csrc/parrots/roi_align.cpp | 119 +- mmcv/ops/csrc/parrots/roi_align_cpu.cpp | 430 ++++ mmcv/ops/csrc/parrots/roi_align_rotated.cpp | 154 +- .../roi_align_rotated_cpu.cpp} | 280 +-- .../parrots/roi_align_rotated_parrots.cpp | 28 +- .../csrc/parrots/roi_align_rotated_pytorch.h | 12 +- mmcv/ops/csrc/parrots/roi_pool.cpp | 58 +- mmcv/ops/csrc/parrots/roiaware_pool3d.cpp | 111 +- mmcv/ops/csrc/parrots/roipoint_pool3d.cpp | 49 +- .../csrc/parrots/rotated_feature_align.cpp | 39 - .../parrots/rotated_feature_align_parrots.cpp | 99 - .../parrots/rotated_feature_align_pytorch.h | 17 - mmcv/ops/csrc/parrots/sync_bn.cpp | 136 +- mmcv/ops/csrc/parrots/three_interpolate.cpp | 54 +- mmcv/ops/csrc/parrots/three_nn.cpp | 26 +- mmcv/ops/csrc/parrots/tin_shift.cpp | 46 +- mmcv/ops/csrc/parrots/upfirdn2d.cpp | 130 +- mmcv/ops/csrc/parrots/voxelization.cpp | 83 +- mmcv/ops/csrc/parrots/voxelization_cpu.cpp | 152 ++ .../ops/csrc/parrots/voxelization_parrots.cpp | 9 +- mmcv/ops/csrc/parrots/voxelization_pytorch.h | 3 +- .../csrc/pytorch/active_rotated_filter.cpp | 28 - mmcv/ops/csrc/pytorch/ball_query.cpp | 18 - mmcv/ops/csrc/pytorch/bezier_align.cpp | 38 - mmcv/ops/csrc/pytorch/bias_act.cpp | 20 - mmcv/ops/csrc/pytorch/box_iou_quadri.cpp | 17 - mmcv/ops/csrc/pytorch/chamfer_distance.cpp | 35 - mmcv/ops/csrc/pytorch/contour_expand.cpp | 1 + mmcv/ops/csrc/pytorch/convex_iou.cpp | 23 - mmcv/ops/csrc/pytorch/corner_pool.cpp | 240 ++ .../pytorch/cpu/active_rotated_filter.cpp | 120 - .../csrc/pytorch/cpu/bbox_overlaps_cpu.cpp | 65 - mmcv/ops/csrc/pytorch/cpu/box_iou_quadri.cpp | 36 - mmcv/ops/csrc/pytorch/cpu/nms_rotated.cpp | 2 +- mmcv/ops/csrc/pytorch/cpu/pixel_group.cpp | 6 +- .../csrc/pytorch/cpu/roi_align_rotated.cpp | 9 +- .../pytorch/cpu/rotated_feature_align.cpp | 262 --- mmcv/ops/csrc/pytorch/cpu/sparse_indice.cpp | 84 - mmcv/ops/csrc/pytorch/cpu/sparse_maxpool.cpp | 82 - .../csrc/pytorch/cpu/sparse_reordering.cpp | 68 - mmcv/ops/csrc/pytorch/cpu/voxelization.cpp | 30 +- .../cuda/active_rotated_filter_cuda.cu | 58 - .../pytorch/cuda/assign_score_withk_cuda.cu | 6 +- mmcv/ops/csrc/pytorch/cuda/ball_query_cuda.cu | 2 +- .../csrc/pytorch/cuda/bbox_overlaps_cuda.cu | 17 - .../csrc/pytorch/cuda/bezier_align_cuda.cu | 53 - mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.cu | 300 --- .../csrc/pytorch/cuda/box_iou_quadri_cuda.cu | 23 - .../pytorch/cuda/chamfer_distance_cuda.cu | 63 - mmcv/ops/csrc/pytorch/cuda/convex_iou.cu | 41 - .../ops/csrc/pytorch/cuda/correlation_cuda.cu | 43 +- mmcv/ops/csrc/pytorch/cuda/cudabind.cpp | 852 ++----- .../pytorch/cuda/diff_iou_rotated_cuda.cu | 35 - mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.cu | 2044 ----------------- .../pytorch/cuda/fused_spconv_ops_cuda.cu | 104 - .../csrc/pytorch/cuda/gather_points_cuda.cu | 4 +- .../csrc/pytorch/cuda/group_points_cuda.cu | 4 +- mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.cu | 90 +- mmcv/ops/csrc/pytorch/cuda/knn_cuda.cu | 2 +- .../csrc/pytorch/cuda/min_area_polygons.cu | 21 - .../csrc/pytorch/cuda/ms_deform_attn_cuda.cu | 22 +- mmcv/ops/csrc/pytorch/cuda/nms_cuda.cu | 39 +- mmcv/ops/csrc/pytorch/cuda/nms_quadri_cuda.cu | 60 - .../csrc/pytorch/cuda/points_in_boxes_cuda.cu | 4 +- .../pytorch/cuda/points_in_polygons_cuda.cu | 28 - mmcv/ops/csrc/pytorch/cuda/prroi_pool_cuda.cu | 65 - mmcv/ops/csrc/pytorch/cuda/psamask_cuda.cu | 3 + .../pytorch/cuda/riroi_align_rotated_cuda.cu | 53 - .../pytorch/cuda/roi_align_rotated_cuda.cu | 14 +- .../csrc/pytorch/cuda/roiaware_pool3d_cuda.cu | 10 +- .../csrc/pytorch/cuda/roipoint_pool3d_cuda.cu | 6 +- .../cuda/rotated_feature_align_cuda.cu | 53 - .../csrc/pytorch/cuda/scatter_points_cuda.cu | 13 +- mmcv/ops/csrc/pytorch/cuda/sparse_indice.cu | 159 -- mmcv/ops/csrc/pytorch/cuda/sparse_maxpool.cu | 486 ---- .../csrc/pytorch/cuda/sparse_pool_ops_cuda.cu | 91 - .../csrc/pytorch/cuda/sparse_reordering.cu | 160 -- mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.cu | 477 ---- .../pytorch/cuda/stack_ball_query_cuda.cu | 45 - .../pytorch/cuda/stack_group_points_cuda.cu | 62 - .../pytorch/cuda/three_interpolate_cuda.cu | 4 +- mmcv/ops/csrc/pytorch/cuda/three_nn_cuda.cu | 2 +- .../ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu | 1015 +++----- .../csrc/pytorch/cuda/voxelization_cuda.cu | 98 - mmcv/ops/csrc/pytorch/diff_iou_rotated.cpp | 14 - mmcv/ops/csrc/pytorch/filtered_lrelu.cpp | 37 - mmcv/ops/csrc/pytorch/fused_spconv_ops.cpp | 34 - mmcv/ops/csrc/pytorch/group_points.cpp | 42 - mmcv/ops/csrc/pytorch/info.cpp | 15 +- mmcv/ops/csrc/pytorch/iou3d.cpp | 117 +- mmcv/ops/csrc/pytorch/min_area_polygons.cpp | 11 - .../csrc/pytorch/mlu/bbox_overlaps_mlu.cpp | 100 - mmcv/ops/csrc/pytorch/mlu/carafe_mlu.cpp | 429 ---- .../csrc/pytorch/mlu/deform_roi_pool_mlu.cpp | 343 --- .../pytorch/mlu/focal_loss_sigmoid_mlu.cpp | 332 --- mmcv/ops/csrc/pytorch/mlu/iou3d_mlu.cpp | 144 -- .../csrc/pytorch/mlu/masked_conv2d_mlu.cpp | 226 -- .../csrc/pytorch/mlu/ms_deform_attn_mlu.cpp | 420 ---- mmcv/ops/csrc/pytorch/mlu/nms_mlu.cpp | 156 -- mmcv/ops/csrc/pytorch/mlu/psamask_mlu.cpp | 308 --- mmcv/ops/csrc/pytorch/mlu/roi_align_mlu.cpp | 206 -- .../pytorch/mlu/roi_align_rotated_mlu.cpp | 232 -- mmcv/ops/csrc/pytorch/mlu/roi_pool_mlu.cpp | 275 --- .../csrc/pytorch/mlu/roiaware_pool3d_mlu.cpp | 399 ---- .../csrc/pytorch/mlu/roipoint_pool3d_mlu.cpp | 166 -- mmcv/ops/csrc/pytorch/mlu/three_nn_mlu.cpp | 100 - mmcv/ops/csrc/pytorch/mlu/tin_shift_mlu.cpp | 203 -- .../ops/csrc/pytorch/mps/bbox_overlaps_mps.mm | 99 - mmcv/ops/csrc/pytorch/nms_quadri.cpp | 30 - mmcv/ops/csrc/pytorch/nms_rotated.cpp | 21 +- .../csrc/pytorch/npu/bbox_overlaps_npu.cpp | 34 - mmcv/ops/csrc/pytorch/npu/deform_roi_pool.cpp | 63 - mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp | 162 -- .../pytorch/npu/fused_bias_leakyrelu_npu.cpp | 54 - .../csrc/pytorch/npu/gather_points_npu.cpp | 29 - mmcv/ops/csrc/pytorch/npu/nms_npu.cpp | 45 - mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp | 32 - mmcv/ops/csrc/pytorch/npu/psa_mask_npu.cpp | 75 - mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp | 63 - .../ops/csrc/pytorch/npu/voxelization_npu.cpp | 59 - mmcv/ops/csrc/pytorch/points_in_polygons.cpp | 15 - mmcv/ops/csrc/pytorch/prroi_pool.cpp | 47 - mmcv/ops/csrc/pytorch/pybind.cpp | 397 +--- mmcv/ops/csrc/pytorch/riroi_align_rotated.cpp | 42 - mmcv/ops/csrc/pytorch/roi_align_rotated.cpp | 12 +- .../csrc/pytorch/rotated_feature_align.cpp | 39 - mmcv/ops/csrc/pytorch/sparse_pool_ops.cpp | 48 - mmcv/ops/csrc/pytorch/spconv_ops.cpp | 171 -- mmcv/ops/csrc/pytorch/spconv_utils.h | 79 - mmcv/ops/csrc/pytorch/upfirdn2d.cpp | 22 +- mmcv/ops/csrc/pytorch/voxelization.cpp | 26 +- .../csrc/tensorrt/plugins/trt_corner_pool.cpp | 217 ++ .../plugins/trt_corner_pool_kernel.cu | 110 + .../csrc/tensorrt/plugins/trt_cuda_helper.cu | 91 + .../csrc/tensorrt/plugins/trt_cummaxmin.cpp | 242 ++ .../tensorrt/plugins/trt_cummaxmin_kernel.cu | 90 + .../csrc/tensorrt/plugins/trt_deform_conv.cpp | 318 +++ .../plugins/trt_deform_conv_kernel.cu | 129 ++ .../tensorrt/plugins/trt_grid_sampler.cpp | 256 +++ .../plugins/trt_grid_sampler_kernel.cu | 441 ++++ .../tensorrt/plugins/trt_instance_norm.cpp | 246 ++ .../plugins/trt_modulated_deform_conv.cpp | 308 +++ .../trt_modulated_deform_conv_kernel.cu | 134 ++ mmcv/ops/csrc/tensorrt/plugins/trt_nms.cpp | 279 +++ .../csrc/tensorrt/plugins/trt_nms_kernel.cu | 272 +++ mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp | 27 + .../csrc/tensorrt/plugins/trt_roi_align.cpp | 294 +++ .../tensorrt/plugins/trt_roi_align_kernel.cu | 28 + .../csrc/tensorrt/plugins/trt_scatternd.cpp | 207 ++ .../tensorrt/plugins/trt_scatternd_kernel.cu | 93 + mmcv/ops/csrc/tensorrt/trt_corner_pool.hpp | 111 + mmcv/ops/csrc/tensorrt/trt_cuda_helper.cuh | 41 + mmcv/ops/csrc/tensorrt/trt_cummaxmin.hpp | 122 + mmcv/ops/csrc/tensorrt/trt_deform_conv.hpp | 118 + mmcv/ops/csrc/tensorrt/trt_grid_sampler.hpp | 108 + mmcv/ops/csrc/tensorrt/trt_instance_norm.hpp | 120 + .../tensorrt/trt_modulated_deform_conv.hpp | 120 + mmcv/ops/csrc/tensorrt/trt_nms.hpp | 107 + mmcv/ops/csrc/tensorrt/trt_plugin.hpp | 7 + mmcv/ops/csrc/tensorrt/trt_plugin_helper.hpp | 41 + mmcv/ops/csrc/tensorrt/trt_roi_align.hpp | 108 + mmcv/ops/csrc/tensorrt/trt_scatternd.hpp | 98 + mmcv/ops/csrc/tensorrt/trt_serialize.hpp | 105 + mmcv/ops/deform_conv.py | 78 +- mmcv/ops/deform_roi_pool.py | 81 +- mmcv/ops/deprecated_wrappers.py | 11 +- mmcv/ops/diff_iou_rotated.py | 301 --- mmcv/ops/filtered_lrelu.py | 414 ---- mmcv/ops/focal_loss.py | 88 +- mmcv/ops/furthest_point_sample.py | 9 +- mmcv/ops/fused_bias_leakyrelu.py | 50 +- mmcv/ops/gather_points.py | 14 +- mmcv/ops/group_points.py | 178 +- mmcv/ops/info.py | 15 + mmcv/ops/iou3d.py | 215 +- mmcv/ops/knn.py | 17 +- mmcv/ops/masked_conv.py | 71 +- mmcv/ops/merge_cells.py | 65 +- mmcv/ops/min_area_polygons.py | 20 - mmcv/ops/modulated_deform_conv.py | 141 +- mmcv/ops/multi_scale_deform_attn.py | 159 +- mmcv/ops/nms.py | 322 +-- mmcv/ops/pixel_group.py | 29 +- mmcv/ops/point_sample.py | 186 +- mmcv/ops/points_in_boxes.py | 20 +- mmcv/ops/points_in_polygons.py | 38 - mmcv/ops/points_sampler.py | 65 +- mmcv/ops/prroi_pool.py | 152 -- mmcv/ops/psa_mask.py | 16 +- mmcv/ops/riroi_align_rotated.py | 140 -- mmcv/ops/roi_align.py | 120 +- mmcv/ops/roi_align_rotated.py | 140 +- mmcv/ops/roi_pool.py | 20 +- mmcv/ops/roiaware_pool3d.py | 30 +- mmcv/ops/roipoint_pool3d.py | 30 +- mmcv/ops/rotated_feature_align.py | 95 - mmcv/ops/saconv.py | 14 +- mmcv/ops/scatter_points.py | 47 +- mmcv/ops/sparse_conv.py | 455 ---- mmcv/ops/sparse_functional.py | 156 -- mmcv/ops/sparse_modules.py | 203 -- mmcv/ops/sparse_ops.py | 174 -- mmcv/ops/sparse_pool.py | 86 - mmcv/ops/sparse_structure.py | 66 - mmcv/ops/sync_bn.py | 32 +- mmcv/ops/three_interpolate.py | 25 +- mmcv/ops/three_nn.py | 16 +- mmcv/ops/tin_shift.py | 13 +- mmcv/ops/upfirdn2d.py | 754 +++--- mmcv/ops/voxelize.py | 86 +- mmcv/parallel/__init__.py | 13 + mmcv/parallel/_functions.py | 79 + mmcv/parallel/collate.py | 84 + mmcv/parallel/data_container.py | 89 + mmcv/parallel/data_parallel.py | 97 + mmcv/parallel/distributed.py | 112 + mmcv/parallel/distributed_deprecated.py | 70 + mmcv/parallel/registry.py | 8 + mmcv/parallel/scatter_gather.py | 59 + mmcv/parallel/utils.py | 20 + mmcv/runner/__init__.py | 47 + mmcv/runner/base_module.py | 195 ++ mmcv/runner/base_runner.py | 542 +++++ mmcv/runner/builder.py | 24 + mmcv/runner/checkpoint.py | 710 ++++++ mmcv/runner/default_constructor.py | 44 + mmcv/runner/dist_utils.py | 164 ++ mmcv/runner/epoch_based_runner.py | 187 ++ mmcv/runner/fp16_utils.py | 410 ++++ mmcv/runner/hooks/__init__.py | 29 + mmcv/runner/hooks/checkpoint.py | 167 ++ mmcv/runner/hooks/closure.py | 11 + mmcv/runner/hooks/ema.py | 89 + mmcv/runner/hooks/evaluation.py | 509 ++++ mmcv/runner/hooks/hook.py | 92 + mmcv/runner/hooks/iter_timer.py | 18 + mmcv/runner/hooks/logger/__init__.py | 15 + mmcv/runner/hooks/logger/base.py | 166 ++ mmcv/runner/hooks/logger/dvclive.py | 58 + mmcv/runner/hooks/logger/mlflow.py | 78 + mmcv/runner/hooks/logger/neptune.py | 82 + mmcv/runner/hooks/logger/pavi.py | 117 + mmcv/runner/hooks/logger/tensorboard.py | 57 + mmcv/runner/hooks/logger/text.py | 256 +++ mmcv/runner/hooks/logger/wandb.py | 56 + mmcv/runner/hooks/lr_updater.py | 670 ++++++ mmcv/runner/hooks/memory.py | 25 + mmcv/runner/hooks/momentum_updater.py | 493 ++++ mmcv/runner/hooks/optimizer.py | 508 ++++ mmcv/runner/hooks/profiler.py | 180 ++ mmcv/runner/hooks/sampler_seed.py | 20 + mmcv/runner/hooks/sync_buffer.py | 22 + mmcv/runner/iter_based_runner.py | 273 +++ mmcv/runner/log_buffer.py | 41 + mmcv/runner/optimizer/__init__.py | 9 + mmcv/runner/optimizer/builder.py | 44 + mmcv/runner/optimizer/default_constructor.py | 249 ++ mmcv/runner/priority.py | 60 + mmcv/runner/utils.py | 93 + mmcv/tensorrt/__init__.py | 30 + mmcv/tensorrt/init_plugins.py | 37 + mmcv/tensorrt/preprocess.py | 120 + mmcv/tensorrt/tensorrt_utils.py | 235 ++ mmcv/transforms/__init__.py | 30 - mmcv/transforms/base.py | 30 - mmcv/transforms/builder.py | 2 - mmcv/transforms/formatting.py | 127 - mmcv/transforms/loading.py | 360 --- mmcv/transforms/processing.py | 1562 ------------- mmcv/transforms/utils.py | 249 -- mmcv/transforms/wrappers.py | 649 ------ mmcv/utils/__init__.py | 76 +- mmcv/utils/config.py | 688 ++++++ mmcv/utils/device_type.py | 8 - mmcv/utils/env.py | 75 +- mmcv/utils/ext_loader.py | 3 +- mmcv/utils/hub.py | 127 + mmcv/utils/logging.py | 110 + mmcv/utils/misc.py | 377 +++ mmcv/utils/parrots_jit.py | 2 +- mmcv/utils/parrots_wrapper.py | 107 + mmcv/utils/path.py | 101 + mmcv/utils/progressbar.py | 208 ++ mmcv/utils/registry.py | 315 +++ mmcv/utils/testing.py | 140 ++ mmcv/utils/timer.py | 118 + mmcv/utils/trace.py | 23 + mmcv/utils/version_utils.py | 90 + mmcv/version.py | 10 +- mmcv/video/io.py | 42 +- mmcv/video/optflow.py | 50 +- mmcv/video/processing.py | 59 +- mmcv/visualization/color.py | 8 +- mmcv/visualization/image.py | 63 +- mmcv/visualization/optflow.py | 14 +- requirements.txt | 9 +- requirements/build.txt | 1 - requirements/docs.txt | 4 +- requirements/optional.txt | 2 - requirements/runtime.txt | 1 - requirements/test.txt | 6 +- setup.cfg | 10 +- setup.py | 275 ++- tests/data/config/a.b.py | 1 - tests/data/config/a.py | 1 - tests/data/config/base.py | 1 - tests/data/config/code.py | 1 - tests/data/config/d.py | 1 - tests/data/config/delete.py | 4 +- tests/data/config/deprecated.py | 1 - tests/data/config/deprecated_as_base.py | 1 - tests/data/config/e.py | 1 - tests/data/config/expected.py | 1 - tests/data/config/f.py | 1 - tests/data/config/g.py | 1 - tests/data/config/h.py | 1 - tests/data/config/i_base.py | 1 - tests/data/config/i_child.py | 1 - tests/data/config/l.py | 7 - tests/data/config/l1.py | 1 - tests/data/config/l4.py | 1 - tests/data/config/m.py | 1 - tests/data/config/n.py | 7 - tests/data/config/q.py | 1 - tests/data/config/r.py | 1 - tests/data/config/s.py | 1 - tests/data/config/t.py | 1 - tests/data/config/u.py | 1 - tests/data/config/v.py | 1 - tests/data/for_carafe/carafe_feat.bin | Bin 4608 -> 0 bytes tests/data/for_carafe/carafe_feat_grad.bin | 33 - tests/data/for_carafe/carafe_mask.bin | Bin 28800 -> 0 bytes tests/data/for_carafe/carafe_mask_grad.bin | Bin 28800 -> 0 bytes tests/data/for_carafe/carafe_output.bin | Bin 18432 -> 0 bytes .../masked_conv2d_for_bias.npy | Bin 140 -> 0 bytes .../masked_conv2d_for_input.npy | Bin 3200 -> 0 bytes .../masked_conv2d_for_mask.npy | Bin 1152 -> 0 bytes .../masked_conv2d_for_output.npy | Bin 3200 -> 0 bytes .../masked_conv2d_for_weight.npy | Bin 452 -> 0 bytes tests/data/scripts/hello.py | 1 - tests/test_arraymisc.py | 1 + tests/test_cnn/test_build_layers.py | 81 +- tests/test_cnn/test_context_block.py | 1 - .../test_cnn/test_conv2d_adaptive_padding.py | 1 - tests/test_cnn/test_conv_module.py | 17 +- .../test_depthwise_seperable_conv_module.py | 1 - tests/test_cnn/test_flops_counter.py | 1 - tests/test_cnn/test_fuse_conv_bn.py | 1 - tests/test_cnn/test_generalized_attention.py | 1 - tests/test_cnn/test_hsigmoid.py | 7 +- tests/test_cnn/test_hswish.py | 1 - tests/test_cnn/test_model_registry.py | 63 + tests/test_cnn/test_non_local.py | 1 - tests/test_cnn/test_revert_syncbn.py | 58 + tests/test_cnn/test_rfsearch/test_operator.py | 325 --- tests/test_cnn/test_rfsearch/test_search.py | 128 -- tests/test_cnn/test_scale.py | 59 +- tests/test_cnn/test_silu.py | 28 - tests/test_cnn/test_swish.py | 5 +- tests/test_cnn/test_transformer.py | 495 +--- tests/test_cnn/test_weight_init.py | 559 +++++ tests/test_cnn/test_wrappers.py | 1 - tests/test_fileclient.py | 860 +++++++ tests/test_fileio.py | 211 ++ tests/test_image/test_geometric.py | 7 - tests/test_image/test_image_misc.py | 21 - tests/test_image/test_io.py | 115 +- tests/test_image/test_photometric.py | 102 +- tests/test_load_model_zoo.py | 146 ++ tests/test_ops/output.pkl | Bin 2168 -> 0 bytes tests/test_ops/test_active_rotated_filter.py | 258 --- tests/test_ops/test_assign_score_withk.py | 1 - tests/test_ops/test_ball_query.py | 48 - tests/test_ops/test_bbox.py | 71 +- tests/test_ops/test_bezier_align.py | 54 - tests/test_ops/test_bias_act.py | 144 -- tests/test_ops/test_bilinear_grid_sample.py | 7 +- tests/test_ops/test_border_align.py | 3 +- tests/test_ops/test_box_iou_quadri.py | 77 - tests/test_ops/test_box_iou_rotated.py | 48 +- tests/test_ops/test_carafe.py | 60 +- tests/test_ops/test_cc_attention.py | 3 +- tests/test_ops/test_chamfer_distance.py | 57 - tests/test_ops/test_contour_expand.py | 1 - tests/test_ops/test_conv_gradfix.py | 43 - tests/test_ops/test_convex_iou.py | 56 - tests/test_ops/test_corner_pool.py | 1 - tests/test_ops/test_correlation.py | 11 +- tests/test_ops/test_deform_conv.py | 7 +- tests/test_ops/test_deform_roi_pool.py | 60 +- tests/test_ops/test_diff_iou_rotated.py | 49 - tests/test_ops/test_filtered_lrelu.py | 224 -- tests/test_ops/test_focal_loss.py | 52 +- tests/test_ops/test_furthest_point_sample.py | 1 - tests/test_ops/test_fused_bias_leakyrelu.py | 60 +- tests/test_ops/test_gather_points.py | 95 +- tests/test_ops/test_group_points.py | 168 +- tests/test_ops/test_info.py | 3 +- tests/test_ops/test_iou3d.py | 153 +- tests/test_ops/test_knn.py | 1 - tests/test_ops/test_masked_conv2d.py | 43 +- tests/test_ops/test_merge_cells.py | 80 +- tests/test_ops/test_min_area_polygons.py | 30 - tests/test_ops/test_modulated_deform_conv.py | 7 +- tests/test_ops/test_ms_deformable_attn.py | 206 +- tests/test_ops/test_nms.py | 39 +- tests/test_ops/test_nms_quadri.py | 119 - tests/test_ops/test_nms_rotated.py | 99 +- tests/test_ops/test_onnx.py | 737 +++++- tests/test_ops/test_pixel_group.py | 1 - tests/test_ops/test_points_in_polygons.py | 23 - tests/test_ops/test_prroi_pool.py | 98 - tests/test_ops/test_psa_mask.py | 60 +- tests/test_ops/test_riroi_align_rotated.py | 84 - tests/test_ops/test_roi_align.py | 31 +- tests/test_ops/test_roi_align_rotated.py | 53 +- tests/test_ops/test_roi_pool.py | 44 +- tests/test_ops/test_roiaware_pool3d.py | 44 +- tests/test_ops/test_roipoint_pool3d.py | 51 +- tests/test_ops/test_rotated_feature_align.py | 131 -- tests/test_ops/test_saconv.py | 1 - tests/test_ops/test_scatter_points.py | 50 +- tests/test_ops/test_spconv.py | 133 -- tests/test_ops/test_syncbn.py | 3 +- tests/test_ops/test_tensorrt.py | 807 +++++++ tests/test_ops/test_tensorrt_preprocess.py | 75 + tests/test_ops/test_three_interpolate.py | 36 +- tests/test_ops/test_three_nn.py | 118 +- tests/test_ops/test_tin_shift.py | 57 +- tests/test_ops/test_upfirdn2d.py | 29 +- tests/test_ops/test_voxelization.py | 116 +- tests/test_parallel.py | 66 + tests/test_runner/test_basemodule.py | 557 +++++ tests/test_runner/test_checkpoint.py | 432 ++++ tests/test_runner/test_dist_utils.py | 52 + tests/test_runner/test_eval_hook.py | 482 ++++ tests/test_runner/test_fp16.py | 300 +++ tests/test_runner/test_hooks.py | 1488 ++++++++++++ tests/test_runner/test_optimizer.py | 639 ++++++ tests/test_runner/test_runner.py | 289 +++ tests/test_runner/test_utils.py | 38 + .../test_transforms_formatting.py | 101 - .../test_transforms_loading.py | 151 -- .../test_transforms_processing.py | 1014 -------- .../test_transforms_wrapper.py | 585 ----- tests/test_utils/test_config.py | 534 +++++ tests/test_utils/test_env.py | 7 +- tests/test_utils/test_hub.py | 32 + tests/test_utils/test_logging.py | 117 + tests/test_utils/test_misc.py | 225 ++ tests/test_utils/test_parrots_jit.py | 8 +- tests/test_utils/test_path.py | 73 + tests/test_utils/test_progressbar.py | 171 ++ tests/test_utils/test_registry.py | 282 +++ tests/test_utils/test_testing.py | 194 ++ tests/test_utils/test_timer.py | 40 + tests/test_utils/test_trace.py | 24 + tests/test_utils/test_version_utils.py | 57 + tests/test_video/test_reader.py | 2 +- tests/test_visualization.py | 2 +- 840 files changed, 47681 insertions(+), 60178 deletions(-) delete mode 100644 CONTRIBUTING_zh-CN.md create mode 100644 Dockerfile delete mode 100644 docker/README.md delete mode 100644 docker/dev/Dockerfile delete mode 100644 docker/release/Dockerfile rename docs/{en => }/Makefile (100%) rename docs/{en => }/_static/community/1.png (100%) rename docs/{en => }/_static/community/2.png (100%) rename docs/{en => }/_static/community/3.png (100%) rename docs/{en => }/_static/css/readthedocs.css (75%) rename docs/{en => }/_static/flow_img2toimg1.png (100%) rename docs/{en => }/_static/flow_raw_images.png (100%) rename docs/{en => }/_static/flow_visualization.png (100%) rename docs/{en => }/_static/flow_warp.png (100%) rename docs/{en => }/_static/flow_warp_diff.png (100%) rename docs/{en => }/_static/image/mmcv-logo.png (100%) rename docs/{en => }/_static/parallel_progress.gif (100%) rename docs/{en => }/_static/parallel_progress.png (100%) rename docs/{en => }/_static/progress.gif (100%) rename docs/{en => }/_static/progress.png (100%) create mode 100644 docs/_static/qq_group_qrcode.jpg create mode 100644 docs/_static/zhihu_qrcode.jpg create mode 100644 docs/api.rst create mode 120000 docs/community/contributing.md create mode 100644 docs/community/pr.md rename docs/{en => }/compatibility.md (100%) rename docs/{zh_cn => }/conf.py (62%) rename docs/{en => }/deployment/mmcv_ops_definition.md (80%) create mode 100644 docs/deployment/onnx.md create mode 100644 docs/deployment/onnxruntime_custom_ops.md create mode 100644 docs/deployment/onnxruntime_op.md create mode 100644 docs/deployment/tensorrt_custom_ops.md create mode 100644 docs/deployment/tensorrt_plugin.md delete mode 100644 docs/en/_static/version.json delete mode 100644 docs/en/_templates/classtemplate.rst delete mode 100644 docs/en/api/arraymisc.rst delete mode 100644 docs/en/api/cnn.rst delete mode 100644 docs/en/api/image.rst delete mode 100644 docs/en/api/ops.rst delete mode 100644 docs/en/api/transforms.rst delete mode 100644 docs/en/api/utils.rst delete mode 100644 docs/en/api/video.rst delete mode 100644 docs/en/api/visualization.rst delete mode 100644 docs/en/community/contributing.md delete mode 100644 docs/en/community/pr.md delete mode 100644 docs/en/docutils.conf delete mode 100644 docs/en/faq.md delete mode 100644 docs/en/get_started/build.md delete mode 100644 docs/en/get_started/installation.md delete mode 100644 docs/en/get_started/introduction.md delete mode 100644 docs/en/switch_language.md delete mode 100644 docs/en/understand_mmcv/cnn.md delete mode 100644 docs/en/understand_mmcv/data_transform.md delete mode 100644 docs/en/understand_mmcv/ops.md create mode 100644 docs/faq.md create mode 100644 docs/get_started/build.md create mode 100644 docs/get_started/installation.md create mode 100644 docs/get_started/introduction.md rename docs/{en => }/get_started/previous_versions.md (93%) rename docs/{en => }/index.rst (71%) rename docs/{en => }/make.bat (100%) rename docs/{en => }/mmcv-logo.png (100%) create mode 100644 docs/understand_mmcv/cnn.md create mode 100644 docs/understand_mmcv/config.md rename docs/{en => }/understand_mmcv/data_process.md (90%) create mode 100644 docs/understand_mmcv/io.md create mode 100644 docs/understand_mmcv/ops.md create mode 100644 docs/understand_mmcv/registry.md create mode 100644 docs/understand_mmcv/runner.md create mode 100644 docs/understand_mmcv/utils.md rename docs/{en => }/understand_mmcv/visualization.md (100%) delete mode 100644 docs/zh_cn/_static/version.json delete mode 100644 docs/zh_cn/_templates/classtemplate.rst delete mode 100644 docs/zh_cn/api/arraymisc.rst delete mode 100644 docs/zh_cn/api/cnn.rst delete mode 100644 docs/zh_cn/api/image.rst delete mode 100644 docs/zh_cn/api/ops.rst delete mode 100644 docs/zh_cn/api/transforms.rst delete mode 100644 docs/zh_cn/api/utils.rst delete mode 100644 docs/zh_cn/api/video.rst delete mode 100644 docs/zh_cn/api/visualization.rst delete mode 100644 docs/zh_cn/community/code_style.md delete mode 100644 docs/zh_cn/community/contributing.md delete mode 100644 docs/zh_cn/community/pr.md delete mode 100644 docs/zh_cn/docutils.conf delete mode 100644 docs/zh_cn/faq.md delete mode 100644 docs/zh_cn/get_started/article.md delete mode 100644 docs/zh_cn/get_started/build.md delete mode 100644 docs/zh_cn/get_started/installation.md delete mode 100644 docs/zh_cn/get_started/introduction.md delete mode 100644 docs/zh_cn/switch_language.md delete mode 100644 docs/zh_cn/understand_mmcv/cnn.md delete mode 100644 docs/zh_cn/understand_mmcv/data_transform.md delete mode 100644 docs/zh_cn/understand_mmcv/ops.md rename {docs/zh_cn => docs_zh_CN}/Makefile (100%) rename {docs/zh_cn => docs_zh_CN}/_static/css/readthedocs.css (75%) rename {docs/zh_cn => docs_zh_CN}/_static/image/mmcv-logo.png (100%) create mode 100644 docs_zh_CN/api.rst create mode 100644 docs_zh_CN/community/contributing.md create mode 100644 docs_zh_CN/community/pr.md rename {docs/zh_cn => docs_zh_CN}/compatibility.md (100%) rename {docs/en => docs_zh_CN}/conf.py (61%) create mode 100644 docs_zh_CN/deployment/onnx.md create mode 100644 docs_zh_CN/deployment/onnxruntime_custom_ops.md create mode 100644 docs_zh_CN/deployment/onnxruntime_op.md create mode 100644 docs_zh_CN/deployment/tensorrt_custom_ops.md create mode 100644 docs_zh_CN/deployment/tensorrt_plugin.md create mode 100644 docs_zh_CN/faq.md create mode 100644 docs_zh_CN/get_started/build.md create mode 100644 docs_zh_CN/get_started/installation.md create mode 100644 docs_zh_CN/get_started/introduction.md rename {docs/zh_cn => docs_zh_CN}/get_started/previous_versions.md (93%) rename {docs/zh_cn => docs_zh_CN}/index.rst (65%) rename {docs/zh_cn => docs_zh_CN}/make.bat (100%) rename {docs/zh_cn => docs_zh_CN}/mmcv-logo.png (100%) create mode 100644 docs_zh_CN/understand_mmcv/cnn.md create mode 100644 docs_zh_CN/understand_mmcv/config.md rename {docs/zh_cn => docs_zh_CN}/understand_mmcv/data_process.md (93%) create mode 100644 docs_zh_CN/understand_mmcv/io.md create mode 100644 docs_zh_CN/understand_mmcv/ops.md create mode 100644 docs_zh_CN/understand_mmcv/registry.md create mode 100644 docs_zh_CN/understand_mmcv/runner.md create mode 100644 docs_zh_CN/understand_mmcv/utils.md rename {docs/zh_cn => docs_zh_CN}/understand_mmcv/visualization.md (100%) create mode 100644 examples/train.py create mode 100644 mmcv/cnn/bricks/registry.py create mode 100644 mmcv/cnn/builder.py delete mode 100644 mmcv/cnn/rfsearch/__init__.py delete mode 100644 mmcv/cnn/rfsearch/operator.py delete mode 100644 mmcv/cnn/rfsearch/search.py delete mode 100644 mmcv/cnn/rfsearch/utils.py create mode 100644 mmcv/cnn/utils/sync_bn.py create mode 100644 mmcv/cnn/utils/weight_init.py create mode 100644 mmcv/engine/__init__.py create mode 100644 mmcv/engine/test.py create mode 100644 mmcv/fileio/__init__.py create mode 100644 mmcv/fileio/file_client.py create mode 100644 mmcv/fileio/handlers/__init__.py create mode 100644 mmcv/fileio/handlers/base.py create mode 100644 mmcv/fileio/handlers/json_handler.py create mode 100644 mmcv/fileio/handlers/pickle_handler.py create mode 100644 mmcv/fileio/handlers/yaml_handler.py create mode 100644 mmcv/fileio/io.py create mode 100644 mmcv/fileio/parse.py create mode 100644 mmcv/model_zoo/deprecated.json create mode 100644 mmcv/model_zoo/mmcls.json create mode 100644 mmcv/model_zoo/open_mmlab.json create mode 100644 mmcv/onnx/__init__.py create mode 100644 mmcv/onnx/info.py create mode 100644 mmcv/onnx/onnx_utils/__init__.py create mode 100644 mmcv/onnx/onnx_utils/symbolic_helper.py create mode 100644 mmcv/onnx/symbolic.py mode change 100755 => 100644 mmcv/ops/__init__.py delete mode 100644 mmcv/ops/active_rotated_filter.py delete mode 100644 mmcv/ops/bezier_align.py delete mode 100644 mmcv/ops/bias_act.py delete mode 100644 mmcv/ops/box_iou_quadri.py delete mode 100644 mmcv/ops/chamfer_distance.py delete mode 100644 mmcv/ops/conv2d_gradfix.py delete mode 100644 mmcv/ops/convex_iou.py delete mode 100644 mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/spconv/indice.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/spconv/reordering.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/carafe_utils.hpp delete mode 100644 mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp delete mode 100644 mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/iou3d_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/iou3d_utils.hpp delete mode 100755 mmcv/ops/csrc/common/mlu/masked_conv2d_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/ms_deform_attn_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/nms_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/nms_utils.hpp delete mode 100644 mmcv/ops/csrc/common/mlu/psamask_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/psamask_utils.hpp delete mode 100644 mmcv/ops/csrc/common/mlu/roi_align_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/roi_align_rotated_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/roi_align_rotated_utils.hpp delete mode 100644 mmcv/ops/csrc/common/mlu/roi_pool_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/roiaware_pool3d_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/roipoint_pool3d_large_boxes_num_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/roipoint_pool3d_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/three_nn_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mlu/tin_shift_mlu_kernel.mlu delete mode 100644 mmcv/ops/csrc/common/mps/MPSDevice.h delete mode 100644 mmcv/ops/csrc/common/mps/MPSLibrary.h delete mode 100644 mmcv/ops/csrc/common/mps/MPSLibrary.mm delete mode 100644 mmcv/ops/csrc/common/mps/MPSStream.h delete mode 100644 mmcv/ops/csrc/common/mps/MPSUtils.h delete mode 100644 mmcv/ops/csrc/common/pytorch_mlu_helper.hpp delete mode 100644 mmcv/ops/csrc/common/pytorch_npu_helper.hpp delete mode 100644 mmcv/ops/csrc/common/utils/spconv/paramsgrid.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/prettyprint.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/spconv/indice.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/spconv/point2voxel.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/spconv/reordering.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/tensorview/helper_kernel.cuh delete mode 100644 mmcv/ops/csrc/common/utils/spconv/tensorview/helper_launch.h delete mode 100644 mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h create mode 100644 mmcv/ops/csrc/onnxruntime/corner_pool.h create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/corner_pool.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/deform_conv.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/modulated_deform_conv.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/nms.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/reduce_ops.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/roi_align.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/roi_align_rotated.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/cpu/soft_nms.cpp create mode 100644 mmcv/ops/csrc/onnxruntime/deform_conv.h create mode 100644 mmcv/ops/csrc/onnxruntime/grid_sample.h create mode 100644 mmcv/ops/csrc/onnxruntime/modulated_deform_conv.h create mode 100644 mmcv/ops/csrc/onnxruntime/nms.h create mode 100644 mmcv/ops/csrc/onnxruntime/onnxruntime_register.h create mode 100644 mmcv/ops/csrc/onnxruntime/onnxruntime_session_options_config_keys.h create mode 100644 mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h create mode 100644 mmcv/ops/csrc/onnxruntime/reduce_ops.h create mode 100644 mmcv/ops/csrc/onnxruntime/roi_align.h create mode 100644 mmcv/ops/csrc/onnxruntime/roi_align_rotated.h create mode 100644 mmcv/ops/csrc/onnxruntime/soft_nms.h delete mode 100644 mmcv/ops/csrc/parrots/active_rotated_filter.cpp delete mode 100644 mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h create mode 100644 mmcv/ops/csrc/parrots/box_iou_rotated_cpu.cpp delete mode 100644 mmcv/ops/csrc/parrots/chamfer_distance.cpp delete mode 100644 mmcv/ops/csrc/parrots/chamfer_distance_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/chamfer_distance_pytorch.h delete mode 100644 mmcv/ops/csrc/parrots/convex_iou.cpp delete mode 100644 mmcv/ops/csrc/parrots/convex_iou_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/convex_iou_pytorch.h create mode 100644 mmcv/ops/csrc/parrots/corner_pool.cpp create mode 100644 mmcv/ops/csrc/parrots/corner_pool_parrots.cpp create mode 100644 mmcv/ops/csrc/parrots/corner_pool_pytorch.h delete mode 100644 mmcv/ops/csrc/parrots/cudabind.cpp create mode 100644 mmcv/ops/csrc/parrots/deform_conv_cpu.cpp delete mode 100644 mmcv/ops/csrc/parrots/diff_iou_rotated.cpp delete mode 100644 mmcv/ops/csrc/parrots/diff_iou_rotated_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/diff_iou_rotated_pytorch.h delete mode 100644 mmcv/ops/csrc/parrots/info.cpp delete mode 100644 mmcv/ops/csrc/parrots/min_area_polygons.cpp delete mode 100644 mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h create mode 100644 mmcv/ops/csrc/parrots/modulated_deform_conv_cpu.cpp rename mmcv/ops/csrc/{pytorch/cpu/nms_quadri.cpp => parrots/nms_rotated_cpu.cpp} (59%) create mode 100644 mmcv/ops/csrc/parrots/points_in_boxes_cpu.cpp delete mode 100644 mmcv/ops/csrc/parrots/points_in_polygons.cpp delete mode 100644 mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h delete mode 100644 mmcv/ops/csrc/parrots/prroi_pool.cpp delete mode 100644 mmcv/ops/csrc/parrots/prroi_pool_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/prroi_pool_pytorch.h delete mode 100644 mmcv/ops/csrc/parrots/riroi_align_rotated.cpp delete mode 100644 mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h create mode 100644 mmcv/ops/csrc/parrots/roi_align_cpu.cpp rename mmcv/ops/csrc/{pytorch/cpu/bezier_align.cpp => parrots/roi_align_rotated_cpu.cpp} (54%) delete mode 100644 mmcv/ops/csrc/parrots/rotated_feature_align.cpp delete mode 100644 mmcv/ops/csrc/parrots/rotated_feature_align_parrots.cpp delete mode 100644 mmcv/ops/csrc/parrots/rotated_feature_align_pytorch.h create mode 100644 mmcv/ops/csrc/parrots/voxelization_cpu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/active_rotated_filter.cpp delete mode 100644 mmcv/ops/csrc/pytorch/bezier_align.cpp delete mode 100644 mmcv/ops/csrc/pytorch/bias_act.cpp delete mode 100644 mmcv/ops/csrc/pytorch/box_iou_quadri.cpp delete mode 100644 mmcv/ops/csrc/pytorch/chamfer_distance.cpp delete mode 100644 mmcv/ops/csrc/pytorch/convex_iou.cpp create mode 100644 mmcv/ops/csrc/pytorch/corner_pool.cpp delete mode 100644 mmcv/ops/csrc/pytorch/cpu/active_rotated_filter.cpp delete mode 100644 mmcv/ops/csrc/pytorch/cpu/bbox_overlaps_cpu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/cpu/box_iou_quadri.cpp delete mode 100644 mmcv/ops/csrc/pytorch/cpu/rotated_feature_align.cpp delete mode 100644 mmcv/ops/csrc/pytorch/cpu/sparse_indice.cpp delete mode 100644 mmcv/ops/csrc/pytorch/cpu/sparse_maxpool.cpp delete mode 100644 mmcv/ops/csrc/pytorch/cpu/sparse_reordering.cpp delete mode 100644 mmcv/ops/csrc/pytorch/cuda/active_rotated_filter_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/bezier_align_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/box_iou_quadri_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/chamfer_distance_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/convex_iou.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/diff_iou_rotated_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/min_area_polygons.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/nms_quadri_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/points_in_polygons_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/prroi_pool_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/riroi_align_rotated_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/rotated_feature_align_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/sparse_indice.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/sparse_maxpool.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/sparse_pool_ops_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/sparse_reordering.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/stack_ball_query_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/cuda/stack_group_points_cuda.cu delete mode 100644 mmcv/ops/csrc/pytorch/diff_iou_rotated.cpp delete mode 100644 mmcv/ops/csrc/pytorch/filtered_lrelu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/fused_spconv_ops.cpp delete mode 100644 mmcv/ops/csrc/pytorch/min_area_polygons.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/bbox_overlaps_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/carafe_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/deform_roi_pool_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/focal_loss_sigmoid_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/iou3d_mlu.cpp delete mode 100755 mmcv/ops/csrc/pytorch/mlu/masked_conv2d_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/ms_deform_attn_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/nms_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/psamask_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/roi_align_mlu.cpp delete mode 100755 mmcv/ops/csrc/pytorch/mlu/roi_align_rotated_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/roi_pool_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/roiaware_pool3d_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/roipoint_pool3d_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/three_nn_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mlu/tin_shift_mlu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/mps/bbox_overlaps_mps.mm delete mode 100644 mmcv/ops/csrc/pytorch/nms_quadri.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/deform_roi_pool.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/gather_points_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/nms_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/psa_mask_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/npu/voxelization_npu.cpp delete mode 100644 mmcv/ops/csrc/pytorch/points_in_polygons.cpp delete mode 100644 mmcv/ops/csrc/pytorch/prroi_pool.cpp delete mode 100644 mmcv/ops/csrc/pytorch/riroi_align_rotated.cpp delete mode 100644 mmcv/ops/csrc/pytorch/rotated_feature_align.cpp delete mode 100644 mmcv/ops/csrc/pytorch/sparse_pool_ops.cpp delete mode 100644 mmcv/ops/csrc/pytorch/spconv_ops.cpp delete mode 100644 mmcv/ops/csrc/pytorch/spconv_utils.h create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool_kernel.cu create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_cuda_helper.cu create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin_kernel.cu create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv_kernel.cu create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler_kernel.cu create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_instance_norm.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv_kernel.cu create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_nms.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_nms_kernel.cu create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_roi_align.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_roi_align_kernel.cu create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_scatternd.cpp create mode 100644 mmcv/ops/csrc/tensorrt/plugins/trt_scatternd_kernel.cu create mode 100644 mmcv/ops/csrc/tensorrt/trt_corner_pool.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_cuda_helper.cuh create mode 100644 mmcv/ops/csrc/tensorrt/trt_cummaxmin.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_deform_conv.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_grid_sampler.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_instance_norm.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_modulated_deform_conv.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_nms.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_plugin.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_plugin_helper.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_roi_align.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_scatternd.hpp create mode 100644 mmcv/ops/csrc/tensorrt/trt_serialize.hpp delete mode 100644 mmcv/ops/diff_iou_rotated.py delete mode 100644 mmcv/ops/filtered_lrelu.py mode change 100755 => 100644 mmcv/ops/iou3d.py delete mode 100644 mmcv/ops/min_area_polygons.py delete mode 100644 mmcv/ops/points_in_polygons.py delete mode 100644 mmcv/ops/prroi_pool.py delete mode 100644 mmcv/ops/riroi_align_rotated.py delete mode 100644 mmcv/ops/rotated_feature_align.py delete mode 100644 mmcv/ops/sparse_conv.py delete mode 100644 mmcv/ops/sparse_functional.py delete mode 100644 mmcv/ops/sparse_modules.py delete mode 100644 mmcv/ops/sparse_ops.py delete mode 100644 mmcv/ops/sparse_pool.py delete mode 100644 mmcv/ops/sparse_structure.py mode change 100755 => 100644 mmcv/ops/tin_shift.py create mode 100644 mmcv/parallel/__init__.py create mode 100644 mmcv/parallel/_functions.py create mode 100644 mmcv/parallel/collate.py create mode 100644 mmcv/parallel/data_container.py create mode 100644 mmcv/parallel/data_parallel.py create mode 100644 mmcv/parallel/distributed.py create mode 100644 mmcv/parallel/distributed_deprecated.py create mode 100644 mmcv/parallel/registry.py create mode 100644 mmcv/parallel/scatter_gather.py create mode 100644 mmcv/parallel/utils.py create mode 100644 mmcv/runner/__init__.py create mode 100644 mmcv/runner/base_module.py create mode 100644 mmcv/runner/base_runner.py create mode 100644 mmcv/runner/builder.py create mode 100644 mmcv/runner/checkpoint.py create mode 100644 mmcv/runner/default_constructor.py create mode 100644 mmcv/runner/dist_utils.py create mode 100644 mmcv/runner/epoch_based_runner.py create mode 100644 mmcv/runner/fp16_utils.py create mode 100644 mmcv/runner/hooks/__init__.py create mode 100644 mmcv/runner/hooks/checkpoint.py create mode 100644 mmcv/runner/hooks/closure.py create mode 100644 mmcv/runner/hooks/ema.py create mode 100644 mmcv/runner/hooks/evaluation.py create mode 100644 mmcv/runner/hooks/hook.py create mode 100644 mmcv/runner/hooks/iter_timer.py create mode 100644 mmcv/runner/hooks/logger/__init__.py create mode 100644 mmcv/runner/hooks/logger/base.py create mode 100644 mmcv/runner/hooks/logger/dvclive.py create mode 100644 mmcv/runner/hooks/logger/mlflow.py create mode 100644 mmcv/runner/hooks/logger/neptune.py create mode 100644 mmcv/runner/hooks/logger/pavi.py create mode 100644 mmcv/runner/hooks/logger/tensorboard.py create mode 100644 mmcv/runner/hooks/logger/text.py create mode 100644 mmcv/runner/hooks/logger/wandb.py create mode 100644 mmcv/runner/hooks/lr_updater.py create mode 100644 mmcv/runner/hooks/memory.py create mode 100644 mmcv/runner/hooks/momentum_updater.py create mode 100644 mmcv/runner/hooks/optimizer.py create mode 100644 mmcv/runner/hooks/profiler.py create mode 100644 mmcv/runner/hooks/sampler_seed.py create mode 100644 mmcv/runner/hooks/sync_buffer.py create mode 100644 mmcv/runner/iter_based_runner.py create mode 100644 mmcv/runner/log_buffer.py create mode 100644 mmcv/runner/optimizer/__init__.py create mode 100644 mmcv/runner/optimizer/builder.py create mode 100644 mmcv/runner/optimizer/default_constructor.py create mode 100644 mmcv/runner/priority.py create mode 100644 mmcv/runner/utils.py create mode 100644 mmcv/tensorrt/__init__.py create mode 100644 mmcv/tensorrt/init_plugins.py create mode 100644 mmcv/tensorrt/preprocess.py create mode 100644 mmcv/tensorrt/tensorrt_utils.py delete mode 100644 mmcv/transforms/__init__.py delete mode 100644 mmcv/transforms/base.py delete mode 100644 mmcv/transforms/builder.py delete mode 100644 mmcv/transforms/formatting.py delete mode 100644 mmcv/transforms/loading.py delete mode 100644 mmcv/transforms/processing.py delete mode 100644 mmcv/transforms/utils.py delete mode 100644 mmcv/transforms/wrappers.py create mode 100644 mmcv/utils/config.py delete mode 100644 mmcv/utils/device_type.py create mode 100644 mmcv/utils/hub.py create mode 100644 mmcv/utils/logging.py create mode 100644 mmcv/utils/misc.py create mode 100644 mmcv/utils/parrots_wrapper.py create mode 100644 mmcv/utils/path.py create mode 100644 mmcv/utils/progressbar.py create mode 100644 mmcv/utils/registry.py create mode 100644 mmcv/utils/testing.py create mode 100644 mmcv/utils/timer.py create mode 100644 mmcv/utils/trace.py create mode 100644 mmcv/utils/version_utils.py delete mode 100644 requirements/build.txt delete mode 100644 requirements/optional.txt delete mode 100755 tests/data/for_carafe/carafe_feat.bin delete mode 100755 tests/data/for_carafe/carafe_feat_grad.bin delete mode 100755 tests/data/for_carafe/carafe_mask.bin delete mode 100755 tests/data/for_carafe/carafe_mask_grad.bin delete mode 100755 tests/data/for_carafe/carafe_output.bin delete mode 100644 tests/data/for_masked_conv2d/masked_conv2d_for_bias.npy delete mode 100644 tests/data/for_masked_conv2d/masked_conv2d_for_input.npy delete mode 100644 tests/data/for_masked_conv2d/masked_conv2d_for_mask.npy delete mode 100644 tests/data/for_masked_conv2d/masked_conv2d_for_output.npy delete mode 100644 tests/data/for_masked_conv2d/masked_conv2d_for_weight.npy create mode 100644 tests/test_cnn/test_model_registry.py create mode 100644 tests/test_cnn/test_revert_syncbn.py delete mode 100644 tests/test_cnn/test_rfsearch/test_operator.py delete mode 100644 tests/test_cnn/test_rfsearch/test_search.py delete mode 100644 tests/test_cnn/test_silu.py create mode 100644 tests/test_cnn/test_weight_init.py create mode 100644 tests/test_fileclient.py create mode 100644 tests/test_fileio.py create mode 100644 tests/test_load_model_zoo.py delete mode 100644 tests/test_ops/output.pkl delete mode 100644 tests/test_ops/test_active_rotated_filter.py delete mode 100644 tests/test_ops/test_bezier_align.py delete mode 100644 tests/test_ops/test_bias_act.py delete mode 100644 tests/test_ops/test_box_iou_quadri.py delete mode 100644 tests/test_ops/test_chamfer_distance.py delete mode 100644 tests/test_ops/test_conv_gradfix.py delete mode 100644 tests/test_ops/test_convex_iou.py delete mode 100644 tests/test_ops/test_diff_iou_rotated.py delete mode 100644 tests/test_ops/test_filtered_lrelu.py delete mode 100644 tests/test_ops/test_min_area_polygons.py delete mode 100644 tests/test_ops/test_nms_quadri.py delete mode 100644 tests/test_ops/test_points_in_polygons.py delete mode 100644 tests/test_ops/test_prroi_pool.py delete mode 100644 tests/test_ops/test_riroi_align_rotated.py delete mode 100644 tests/test_ops/test_rotated_feature_align.py delete mode 100644 tests/test_ops/test_spconv.py create mode 100644 tests/test_ops/test_tensorrt.py create mode 100644 tests/test_ops/test_tensorrt_preprocess.py mode change 100755 => 100644 tests/test_ops/test_tin_shift.py create mode 100644 tests/test_parallel.py create mode 100644 tests/test_runner/test_basemodule.py create mode 100644 tests/test_runner/test_checkpoint.py create mode 100644 tests/test_runner/test_dist_utils.py create mode 100644 tests/test_runner/test_eval_hook.py create mode 100644 tests/test_runner/test_fp16.py create mode 100644 tests/test_runner/test_hooks.py create mode 100644 tests/test_runner/test_optimizer.py create mode 100644 tests/test_runner/test_runner.py create mode 100644 tests/test_runner/test_utils.py delete mode 100644 tests/test_transforms/test_transforms_formatting.py delete mode 100644 tests/test_transforms/test_transforms_loading.py delete mode 100644 tests/test_transforms/test_transforms_processing.py delete mode 100644 tests/test_transforms/test_transforms_wrapper.py create mode 100644 tests/test_utils/test_config.py create mode 100644 tests/test_utils/test_hub.py create mode 100644 tests/test_utils/test_logging.py create mode 100644 tests/test_utils/test_misc.py create mode 100644 tests/test_utils/test_path.py create mode 100644 tests/test_utils/test_progressbar.py create mode 100644 tests/test_utils/test_registry.py create mode 100644 tests/test_utils/test_testing.py create mode 100644 tests/test_utils/test_timer.py create mode 100644 tests/test_utils/test_trace.py create mode 100644 tests/test_utils/test_version_utils.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a60cd99..2fdf8a2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,258 +1,71 @@ ## Contributing to OpenMMLab -Welcome to the MMCV community, we are committed to building a cutting-edge computer vision foundational library and all kinds of contributions are welcomed, including but not limited to +All kinds of contributions are welcome, including but not limited to the following. -**Fix bug** +- Fix typo or bugs +- Add documentation or translate the documentation into other languages +- Add new features and components -You can directly post a Pull Request to fix typo in code or documents +### Workflow -The steps to fix the bug of code implementation are as follows. +1. fork and pull the latest OpenMMLab repository +2. checkout a new branch (do not use master branch for PRs) +3. commit your changes +4. create a PR -1. If the modification involve significant changes, you should create an issue first and describe the error information and how to trigger the bug. Other developers will discuss with you and propose an proper solution. - -2. Posting a pull request after fixing the bug and adding corresponding unit test. - -**New Feature or Enhancement** - -1. If the modification involve significant changes, you should create an issue to discuss with our developers to propose an proper design. -2. Post a Pull Request after implementing the new feature or enhancement and add corresponding unit test. - -**Document** - -You can directly post a pull request to fix documents. If you want to add a document, you should first create an issue to check if it is reasonable. - -### Pull Request Workflow - -If you're not familiar with Pull Request, don't worry! The following guidance will tell you how to create a Pull Request step by step. If you want to dive into the develop mode of Pull Request, you can refer to the [official documents](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) - -#### 1. Fork and clone - -If you are posting a pull request for the first time, you should fork the OpenMMLab repositories by clicking the **Fork** button in the top right corner of the GitHub page, and the forked repositories will appear under your GitHub profile. - - - -Then, you can clone the repositories to local: - -```shell -git clone git@github.com:{username}/mmcv.git +```{note} +If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. ``` +### Code style -After that, you should ddd official repository as the upstream repository +#### Python -```bash -git remote add upstream git@github.com:open-mmlab/mmcv -``` +We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. -Check whether remote repository has been added successfully by `git remote -v` +We use the following tools for linting and formatting: -```bash -origin git@github.com:{username}/mmcv.git (fetch) -origin git@github.com:{username}/mmcv.git (push) -upstream git@github.com:open-mmlab/mmcv (fetch) -upstream git@github.com:open-mmlab/mmcv (push) -``` +- [flake8](http://flake8.pycqa.org/en/latest/): A wrapper around some linter tools. +- [yapf](https://github.com/google/yapf): A formatter for Python files. +- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports. +- [markdownlint](https://github.com/markdownlint/markdownlint): A linter to check markdown files and flag style issues. +- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring. -> Here's a brief introduction to origin and upstream. When we use "git clone", we create an "origin" remote by default, which points to the repository cloned from. As for "upstream", we add it ourselves to point to the target repository. Of course, if you don't like the name "upstream", you could name it as you wish. Usually, we'll push the code to "origin". If the pushed code conflicts with the latest code in official("upstream"), we should pull the latest code from upstream to resolve the conflicts, and then push to "origin" again. The posted Pull Request will be updated automatically. +Style configurations of yapf and isort can be found in [setup.cfg](./setup.cfg). -#### 2. Configure pre-commit +We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`, +fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit. +The config for a pre-commit hook is stored in [.pre-commit-config](./.pre-commit-config.yaml). -You should configure [pre-commit](https://pre-commit.com/#intro) in the local development environment to make sure the code style matches that of OpenMMLab. **Note**: The following code should be executed under the MMCV directory. +After you clone the repository, you will need to install initialize pre-commit hook. ```shell pip install -U pre-commit -pre-commit install -``` - -Check that pre-commit is configured successfully, and install the hooks defined in `.pre-commit-config.yaml`. - -```shell -pre-commit run --all-files -``` - - - - - -If the installation process is interrupted, you can repeatedly run `pre-commit run ... ` to continue the installation. - -If the code does not conform to the code style specification, pre-commit will raise a warning and fixes some of the errors automatically. - - - -If we want to commit our code bypassing the pre-commit hook, we can use the `--no-verify` option(**only for temporarily commit**). - -```shell -git commit -m "xxx" --no-verify -``` - -#### 3. Create a development branch - -After configuring the pre-commit, we should create a branch based on the master branch to develop the new feature or fix the bug. The proposed branch name is `username/pr_name` - -```shell -git checkout -b yhc/refactor_contributing_doc -``` - -In subsequent development, if the master branch of the local repository is behind the master branch of "upstream", we need to pull the upstream for synchronization, and then execute the above command: - -```shell -git pull upstream master -``` - -#### 4. Commit the code and pass the unit test - -- MMCV introduces mypy to do static type checking to increase the robustness of the code. Therefore, we need to add Type Hints to our code and pass the mypy check. If you are not familiar with Type Hints, you can refer to [this tutorial](https://docs.python.org/3/library/typing.html). - -- The committed code should pass through the unit test - - ```shell - # Pass all unit tests - pytest tests - - # Pass the unit test of runner - pytest tests/test_runner/test_runner.py - ``` - - If the unit test fails for lack of dependencies, you can install the dependencies referring to the [guidance](#unit-test) - -- If the documents are modified/added, we should check the rendering result referring to [guidance](#document-rendering) - -#### 5. Push the code to remote - -We could push the local commits to remote after passing through the check of unit test and pre-commit. You can associate the local branch with remote branch by adding `-u` option. - -```shell -git push -u origin {branch_name} -``` - -This will allow you to use the `git push` command to push code directly next time, without having to specify a branch or the remote repository. - -#### 6. Create a Pull Request - -(1) Create a pull request in GitHub's Pull request interface - - - -(2) Modify the PR description according to the guidelines so that other developers can better understand your changes - - - -Find more details about Pull Request description in [pull request guidelines](#pr-specs). - -**note** - -(a) The Pull Request description should contain the reason for the change, the content of the change, and the impact of the change, and be associated with the relevant Issue (see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) - -(b) If it is your first contribution, please sign the CLA - - - -(c) Check whether the Pull Request pass through the CI - - - -MMCV will run unit test for the posted Pull Request on different platforms (Linux, Window, Mac), based on different versions of Python, PyTorch, CUDA to make sure the code is correct. We can see the specific test information by clicking `Details` in the above image so that we can modify the code. - -(3) If the Pull Request passes the CI, then you can wait for the review from other developers. You'll modify the code based on the reviewer's comments, and repeat the steps [4](#4-commit-the-code-and-pass-the-unit-test)-[5](#5-push-the-code-to-remote) until all reviewers approve it. Then, we will merge it ASAP. - - - -#### 7. Resolve conflicts - -If your local branch conflicts with the latest master branch of "upstream", you'll need to resolove them. There are two ways to do this: - -```shell -git fetch --all --prune -git rebase upstream/master ``` -or - -```shell -git fetch --all --prune -git merge upstream/master -``` - -If you are very good at handling conflicts, then you can use rebase to resolve conflicts, as this will keep your commit logs tidy. If you are not familiar with `rebase`, then you can use `merge` to resolve conflicts. - -### Guidance - -#### Unit test - -If you cannot run the unit test of some modules for lacking of some dependencies, such as [video](https://github.com/open-mmlab/mmcv/tree/master/mmcv/video) module, you can try to install the following dependencies: +From the repository folder ```shell -# Linux -sudo apt-get update -y -sudo apt-get install -y libturbojpeg -sudo apt-get install -y ffmpeg - -# Windows -conda install ffmpeg +pre-commit install ``` -We should also make sure the committed code will not decrease the coverage of unit test, we could run the following command to check the coverage of unit test: +Try the following steps to install ruby when you encounter an issue on installing markdownlint ```shell -python -m coverage run -m pytest /path/to/test_file -python -m coverage html -# check file in htmlcov/index.html -``` - -#### Document rendering +# install rvm +curl -L https://get.rvm.io | bash -s -- --autolibs=read-fail +[[ -s "$HOME/.rvm/scripts/rvm" ]] && source "$HOME/.rvm/scripts/rvm" +rvm autolibs disable -If the documents are modified/added, we should check the rendering result. We could install the dependencies and run the following command to render the documents and check the results: - -```shell -pip install -r requirements/docs.txt -cd docs/zh_cn/ -# or docs/en -make html -# check file in ./docs/zh_cn/_build/html/index.html +# install ruby +rvm install 2.7.1 ``` -### Code style +Or refer to [this repo](https://github.com/innerlee/setup) and take [`zzruby.sh`](https://github.com/innerlee/setup/blob/master/zzruby.sh) according its instruction. -#### Python +After this on every commit check code linters and formatter will be enforced. -We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. - -We use the following tools for linting and formatting: - -- [flake8](https://github.com/PyCQA/flake8): A wrapper around some linter tools. -- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports. -- [yapf](https://github.com/google/yapf): A formatter for Python files. -- [codespell](https://github.com/codespell-project/codespell): A Python utility to fix common misspellings in text files. -- [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files. -- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring. - -Style configurations of yapf and isort can be found in [setup.cfg](./setup.cfg). - -We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`, -fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit. -The config for a pre-commit hook is stored in [.pre-commit-config](./.pre-commit-config.yaml). +>Before you create a PR, make sure that your code lints and is formatted by yapf. #### C++ and CUDA We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). - -### PR Specs - -1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style - -2. One short-time branch should be matched with only one PR - -3. Accomplish a detailed change in one PR. Avoid large PR - - - Bad: Support Faster R-CNN - - Acceptable: Add a box head to Faster R-CNN - - Good: Add a parameter to box head to support custom conv-layer number - -4. Provide clear and significant commit message - -5. Provide clear and meaningful PR description - - - Task name should be clarified in title. The general format is: \[Prefix\] Short description of the PR (Suffix) - - Prefix: add new feature \[Feature\], fix bug \[Fix\], related to documents \[Docs\], in developing \[WIP\] (which will not be reviewed temporarily) - - Introduce main changes, results and influences on other modules in short description - - Associate related issues and pull requests with a milestone diff --git a/CONTRIBUTING_zh-CN.md b/CONTRIBUTING_zh-CN.md deleted file mode 100644 index 0062203..0000000 --- a/CONTRIBUTING_zh-CN.md +++ /dev/null @@ -1,274 +0,0 @@ -## è´¡çŒ®ä»£ç  - -欢迎加入 MMCV ç¤¾åŒºï¼Œæˆ‘ä»¬è‡´åŠ›äºŽæ‰“é€ æœ€å‰æ²¿çš„计算机视觉基础库,我们欢迎任何类型的贡献,包括但ä¸é™äºŽ - -**ä¿®å¤é”™è¯¯** - -ä¿®å¤ä»£ç å®žçŽ°é”™è¯¯çš„æ­¥éª¤å¦‚ä¸‹ï¼š - -1. 如果æäº¤çš„ä»£ç æ”¹åŠ¨è¾ƒå¤§ï¼Œå»ºè®®å…ˆæäº¤ issue,并正确æè¿° issue 的现象ã€åŽŸå› å’Œå¤çŽ°æ–¹å¼ï¼Œè®¨è®ºåŽç¡®è®¤ä¿®å¤æ–¹æ¡ˆã€‚ -2. ä¿®å¤é”™è¯¯å¹¶è¡¥å……相应的å•元测试,æäº¤æ‹‰å–请求。 - -**新增功能或组件** - -1. å¦‚æžœæ–°åŠŸèƒ½æˆ–æ¨¡å—æ¶‰åŠè¾ƒå¤§çš„ä»£ç æ”¹åŠ¨ï¼Œå»ºè®®å…ˆæäº¤ issueï¼Œç¡®è®¤åŠŸèƒ½çš„å¿…è¦æ€§ã€‚ -2. 实现新增功能并添å•元测试,æäº¤æ‹‰å–请求。 - -**文档补充** - -ä¿®å¤æ–‡æ¡£å¯ä»¥ç›´æŽ¥æäº¤æ‹‰å–请求 - -添加文档或将文档翻译æˆå…¶ä»–语言步骤如下 - -1. æäº¤ issueï¼Œç¡®è®¤æ·»åŠ æ–‡æ¡£çš„å¿…è¦æ€§ã€‚ -2. 添加文档,æäº¤æ‹‰å–请求。 - -### 拉å–è¯·æ±‚å·¥ä½œæµ - -如果你对拉å–请求ä¸äº†è§£ï¼Œæ²¡å…³ç³»ï¼ŒæŽ¥ä¸‹æ¥çš„内容将会从零开始,一步一步地指引你如何创建一个拉å–请求。如果你想深入了解拉å–è¯·æ±‚çš„å¼€å‘æ¨¡å¼ï¼Œå¯ä»¥å‚考 github [官方文档](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) - -#### 1. å¤åˆ»ä»“库 - -当你第一次æäº¤æ‹‰å–请求时,先å¤åˆ» OpenMMLab 原代ç åº“,点击 GitHub 页é¢å³ä¸Šè§’çš„ **Fork** 按钮,å¤åˆ»åŽçš„代ç åº“将会出现在你的 GitHub 个人主页下。 - - - -将代ç å…‹éš†åˆ°æœ¬åœ° - -```shell -git clone git@github.com:{username}/mmcv.git -``` - -添加原代ç åº“为上游代ç åº“ - -```bash -git remote add upstream git@github.com:open-mmlab/mmcv -``` - -检查 remote æ˜¯å¦æ·»åŠ æˆåŠŸï¼Œåœ¨ç»ˆç«¯è¾“å…¥ `git remote -v` - -```bash -origin git@github.com:{username}/mmcv.git (fetch) -origin git@github.com:{username}/mmcv.git (push) -upstream git@github.com:open-mmlab/mmcv (fetch) -upstream git@github.com:open-mmlab/mmcv (push) -``` - -> 这里对 origin å’Œ upstream 进行一个简å•的介ç»ï¼Œå½“我们使用 git clone æ¥å…‹éš†ä»£ç æ—¶ï¼Œä¼šé»˜è®¤åˆ›å»ºä¸€ä¸ª origin çš„ remoteï¼Œå®ƒæŒ‡å‘æˆ‘们克隆的代ç åº“地å€ï¼Œè€Œ upstream åˆ™æ˜¯æˆ‘ä»¬è‡ªå·±æ·»åŠ çš„ï¼Œç”¨æ¥æŒ‡å‘原始代ç åº“地å€ã€‚当然如果你ä¸å–œæ¬¢ä»–å« upstream,也å¯ä»¥è‡ªå·±ä¿®æ”¹ï¼Œæ¯”å¦‚å« open-mmlabã€‚æˆ‘ä»¬é€šå¸¸å‘ origin æäº¤ä»£ç ï¼ˆå³ fork 下æ¥çš„远程仓库),然åŽå‘ upstream æäº¤ä¸€ä¸ª pull request。如果æäº¤çš„代ç å’Œæœ€æ–°çš„代ç å‘生冲çªï¼Œå†ä»Ž upstream æ‹‰å–æœ€æ–°çš„代ç ï¼Œå’Œæœ¬åœ°åˆ†æ”¯è§£å†³å†²çªï¼Œå†æäº¤åˆ° origin。 - -#### 2. é…ç½® pre-commit - -在本地开å‘环境中,我们使用 [pre-commit](https://pre-commit.com/#intro) æ¥æ£€æŸ¥ä»£ç é£Žæ ¼ï¼Œä»¥ç¡®ä¿ä»£ç é£Žæ ¼çš„统一。在æäº¤ä»£ç ï¼Œéœ€è¦å…ˆå®‰è£… pre-commit(需è¦åœ¨ MMCV 目录下执行): - -```shell -pip install -U pre-commit -pre-commit install -``` - -检查 pre-commit 是å¦é…ç½®æˆåŠŸï¼Œå¹¶å®‰è£… `.pre-commit-config.yaml` 中的钩å­ï¼š - -```shell -pre-commit run --all-files -``` - - - - - -> 如果你是中国用户,由于网络原因,å¯èƒ½ä¼šå‡ºçŽ°å®‰è£…å¤±è´¥çš„æƒ…å†µï¼Œè¿™æ—¶å¯ä»¥ä½¿ç”¨å›½å†…æº - -> pre-commit install -c .pre-commit-config-zh-cn.yaml - -> pre-commit run --all-files -c .pre-commit-config-zh-cn.yaml - -如果安装过程被中断,å¯ä»¥é‡å¤æ‰§è¡Œ `pre-commit run ...` 继续安装。 - -如果æäº¤çš„代ç ä¸ç¬¦åˆä»£ç é£Žæ ¼è§„范,pre-commit 会å‘出警告,并自动修å¤éƒ¨åˆ†é”™è¯¯ã€‚ - - - -如果我们想临时绕开 pre-commit 的检查æäº¤ä¸€æ¬¡ä»£ç ï¼Œå¯ä»¥åœ¨ `git commit` 时加上 `--no-verify`(需è¦ä¿è¯æœ€å޿ލé€è‡³è¿œç¨‹ä»“库的代ç èƒ½å¤Ÿé€šè¿‡ pre-commit 检查)。 - -```shell -git commit -m "xxx" --no-verify -``` - -#### 3. 创建开å‘分支 - -安装完 pre-commit 之åŽï¼Œæˆ‘们需è¦åŸºäºŽ master 创建开å‘分支,建议的分支命å规则为 `username/pr_name`。 - -```shell -git checkout -b yhc/refactor_contributing_doc -``` - -在åŽç»­çš„å¼€å‘中,如果本地仓库的 master 分支è½åŽäºŽ upstream çš„ master 分支,我们需è¦å…ˆæ‹‰å– upstream 的代ç è¿›è¡ŒåŒæ­¥ï¼Œå†æ‰§è¡Œä¸Šé¢çš„命令 - -```shell -git pull upstream master -``` - -#### 4. æäº¤ä»£ç å¹¶åœ¨æœ¬åœ°é€šè¿‡å•元测试 - -- MMCV 引入了 mypy æ¥åšé™æ€ç±»åž‹æ£€æŸ¥ï¼Œä»¥å¢žåР代ç çš„鲿£’性。因此我们在æäº¤ä»£ç æ—¶ï¼Œéœ€è¦è¡¥å…… Type Hints。具体规则å¯ä»¥å‚考[教程](https://zhuanlan.zhihu.com/p/519335398)。 - -- æäº¤çš„代ç åŒæ ·éœ€è¦é€šè¿‡å•元测试 - - ```shell - # 通过全é‡å•元测试 - pytest tests - - # 我们需è¦ä¿è¯æäº¤çš„代ç èƒ½å¤Ÿé€šè¿‡ä¿®æ”¹æ¨¡å—çš„å•元测试,以 runner 为例 - pytest tests/test_runner/test_runner.py - ``` - - 如果你由于缺少ä¾èµ–无法è¿è¡Œä¿®æ”¹æ¨¡å—çš„å•元测试,å¯ä»¥å‚考[指引-å•元测试](#å•元测试) - -- 如果修改/添加了文档,å‚考[指引](#文档渲染)确认文档渲染正常。 - -#### 5. 推é€ä»£ç åˆ°è¿œç¨‹ - -代ç é€šè¿‡å•元测试和 pre-commit 检查åŽï¼Œå°†ä»£ç æŽ¨é€åˆ°è¿œç¨‹ä»“库,如果是第一次推é€ï¼Œå¯ä»¥åœ¨ `git push` åŽåŠ ä¸Š `-u` 傿•°ä»¥å…³è”远程分支 - -```shell -git push -u origin {branch_name} -``` - -这样下次就å¯ä»¥ç›´æŽ¥ä½¿ç”¨ `git push` 命令推é€ä»£ç äº†ï¼Œè€Œæ— éœ€æŒ‡å®šåˆ†æ”¯å’Œè¿œç¨‹ä»“库。 - -#### 6. æäº¤æ‹‰å–请求(PR) - -(1) 在 GitHub çš„ Pull request 界é¢åˆ›å»ºæ‹‰å–请求 - - -(2) æ ¹æ®æŒ‡å¼•修改 PR æè¿°ï¼Œä»¥ä¾¿äºŽå…¶ä»–å¼€å‘者更好地ç†è§£ä½ çš„修改 - - - -æè¿°è§„范详è§[拉å–请求规范](#拉å–请求规范) - -  - -**注æ„事项** - -(a) PR æè¿°åº”该包å«ä¿®æ”¹ç†ç”±ã€ä¿®æ”¹å†…容以åŠä¿®æ”¹åŽå¸¦æ¥çš„å½±å“,并关è”相关 Issue(具体方å¼è§[文档](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)) - -(b) 如果是第一次为 OpenMMLab åšè´¡çŒ®ï¼Œéœ€è¦ç­¾ç½² CLA - - - -(c) 检查æäº¤çš„ PR 是å¦é€šè¿‡ CIï¼ˆé›†æˆæµ‹è¯•) - - - -MMCV 会在ä¸åŒçš„å¹³å°ï¼ˆLinuxã€Windowã€Mac),基于ä¸åŒç‰ˆæœ¬çš„ Pythonã€PyTorchã€CUDA 对æäº¤çš„代ç è¿›è¡Œå•元测试,以ä¿è¯ä»£ç çš„æ­£ç¡®æ€§ï¼Œå¦‚果有任何一个没有通过,我们å¯ç‚¹å‡»ä¸Šå›¾ä¸­çš„ `Details` æ¥æŸ¥çœ‹å…·ä½“的测试信æ¯ï¼Œä»¥ä¾¿äºŽæˆ‘们修改代ç ã€‚ - -(3) 如果 PR 通过了 CI,那么就å¯ä»¥ç­‰å¾…å…¶ä»–å¼€å‘者的 reviewï¼Œå¹¶æ ¹æ® reviewer çš„æ„è§ï¼Œä¿®æ”¹ä»£ç ï¼Œå¹¶é‡å¤ [4](#4-æäº¤ä»£ç å¹¶æœ¬åœ°é€šè¿‡å•元测试)-[5](#5-推é€ä»£ç åˆ°è¿œç¨‹) 步骤,直到 reviewer åŒæ„åˆå…¥ PR。 - - - -所有 reviewer åŒæ„åˆå…¥ PR åŽï¼Œæˆ‘们会尽快将 PR åˆå¹¶åˆ°ä¸»åˆ†æ”¯ã€‚ - -#### 7. è§£å†³å†²çª - -éšç€æ—¶é—´çš„æŽ¨ç§»ï¼Œæˆ‘们的代ç åº“ä¼šä¸æ–­æ›´æ–°ï¼Œè¿™æ—¶å€™ï¼Œå¦‚果你的 PR 与主分支存在冲çªï¼Œä½ éœ€è¦è§£å†³å†²çªï¼Œè§£å†³å†²çªçš„æ–¹å¼æœ‰ä¸¤ç§ï¼š - -```shell -git fetch --all --prune -git rebase upstream/master -``` - -或者 - -```shell -git fetch --all --prune -git merge upstream/master -``` - -如果你éžå¸¸å–„于处ç†å†²çªï¼Œé‚£ä¹ˆå¯ä»¥ä½¿ç”¨ rebase çš„æ–¹å¼æ¥è§£å†³å†²çªï¼Œå› ä¸ºè¿™èƒ½å¤Ÿä¿è¯ä½ çš„ commit log 的整æ´ã€‚如果你ä¸å¤ªç†Ÿæ‚‰ `rebase` 的使用,那么å¯ä»¥ä½¿ç”¨ `merge` çš„æ–¹å¼æ¥è§£å†³å†²çªã€‚ - -### 指引 - -#### å•元测试 - -如果你无法正常执行部分模å—çš„å•元测试,例如 [video](https://github.com/open-mmlab/mmcv/tree/master/mmcv/video) 模å—,å¯èƒ½æ˜¯ä½ çš„当å‰çŽ¯å¢ƒæ²¡æœ‰å®‰è£…ä»¥ä¸‹ä¾èµ– - -```shell -# Linux -sudo apt-get update -y -sudo apt-get install -y libturbojpeg -sudo apt-get install -y ffmpeg - -# Windows -conda install ffmpeg -``` - -在æäº¤ä¿®å¤ä»£ç é”™è¯¯æˆ–新增特性的拉å–请求时,我们应该尽å¯èƒ½çš„让å•元测试覆盖所有æäº¤çš„代ç ï¼Œè®¡ç®—å•元测试覆盖率的方法如下 - -```shell -python -m coverage run -m pytest /path/to/test_file -python -m coverage html -# check file in htmlcov/index.html -``` - -#### 文档渲染 - -在æäº¤ä¿®å¤ä»£ç é”™è¯¯æˆ–新增特性的拉å–请求时,å¯èƒ½ä¼šéœ€è¦ä¿®æ”¹/新增模å—çš„ docstring。我们需è¦ç¡®è®¤æ¸²æŸ“åŽçš„æ–‡æ¡£æ ·å¼æ˜¯æ­£ç¡®çš„。 -æœ¬åœ°ç”Ÿæˆæ¸²æŸ“åŽçš„æ–‡æ¡£çš„æ–¹æ³•如下 - -```shell -pip install -r requirements/docs.txt -cd docs/zh_cn/ -# or docs/en -make html -# check file in ./docs/zh_cn/_build/html/index.html -``` - -### 代ç é£Žæ ¼ - -#### Python - -[PEP8](https://www.python.org/dev/peps/pep-0008/) 作为 OpenMMLab 算法库首选的代ç è§„范,我们使用以下工具检查和格å¼åŒ–ä»£ç  - -- [flake8](https://github.com/PyCQA/flake8): Python 官方å‘布的代ç è§„范检查工具,是多个检查工具的å°è£… -- [isort](https://github.com/timothycrosley/isort): 自动调整模å—导入顺åºçš„工具 -- [yapf](https://github.com/google/yapf): Google å‘布的代ç è§„范检查工具 -- [codespell](https://github.com/codespell-project/codespell): 检查å•è¯æ‹¼å†™æ˜¯å¦æœ‰è¯¯ -- [mdformat](https://github.com/executablebooks/mdformat): 检查 markdown 文件的工具 -- [docformatter](https://github.com/myint/docformatter): æ ¼å¼åŒ– docstring 的工具 - -yapf å’Œ isort çš„é…ç½®å¯ä»¥åœ¨ [setup.cfg](./setup.cfg) 找到 - -通过é…ç½® [pre-commit hook](https://pre-commit.com/) ,我们å¯ä»¥åœ¨æäº¤ä»£ç æ—¶è‡ªåŠ¨æ£€æŸ¥å’Œæ ¼å¼åŒ– `flake8`ã€`yapf`ã€`isort`ã€`trailing whitespaces`ã€`markdown files`, -ä¿®å¤ `end-of-files`ã€`double-quoted-strings`ã€`python-encoding-pragma`ã€`mixed-line-ending`,调整 `requirments.txt` 的包顺åºã€‚ -pre-commit é’©å­çš„é…ç½®å¯ä»¥åœ¨ [.pre-commit-config](./.pre-commit-config.yaml) 找到。 - -pre-commit 具体的安装使用方å¼è§[拉å–请求](#2-é…ç½®-pre-commit)。 - -更具体的规范请å‚考 [OpenMMLab 代ç è§„范](code_style.md)。 - -#### C++ and CUDA - -C++ å’Œ CUDA 的代ç è§„范éµä»Ž [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) - -### 拉å–请求规范 - -1. 使用 [pre-commit hook](https://pre-commit.com),尽é‡å‡å°‘代ç é£Žæ ¼ç›¸å…³é—®é¢˜ - -2. 一个`拉å–请求`对应一个短期分支 - -3. 粒度è¦ç»†ï¼Œä¸€ä¸ª`拉å–请求`åªåšä¸€ä»¶äº‹æƒ…,é¿å…超大的`拉å–请求` - - - Bad:实现 Faster R-CNN - - Acceptable:给 Faster R-CNN 添加一个 box head - - Good:给 box head å¢žåŠ ä¸€ä¸ªå‚æ•°æ¥æ”¯æŒè‡ªå®šä¹‰çš„ conv 层数 - -4. æ¯æ¬¡ Commit æ—¶éœ€è¦æä¾›æ¸…æ™°ä¸”æœ‰æ„义 commit ä¿¡æ¯ - -5. æä¾›æ¸…晰且有æ„义的`拉å–请求`æè¿° - - - 标题写明白任务å称,一般格å¼:\[Prefix\] Short description of the pull request (Suffix) - - prefix: 新增功能 \[Feature\], ä¿® bug \[Fix\], 文档相关 \[Docs\], å¼€å‘中 \[WIP\] (暂时ä¸ä¼šè¢«review) - - æè¿°é‡Œä»‹ç»`拉å–请求`的主è¦ä¿®æ”¹å†…容,结果,以åŠå¯¹å…¶ä»–部分的影å“, å‚考`拉å–请求`æ¨¡æ¿ - - å…³è”相关的`议题` (issue) 和其他`拉å–请求` - -6. 如果引入了其他三方库,或借鉴了三方库的代ç ï¼Œè¯·ç¡®è®¤ä»–们的许å¯è¯å’Œ mmcv 兼容,并在借鉴的代ç ä¸Šè¡¥å…… `This code is inspired from http://` diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e163b31 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.7 + +WORKDIR /mmcv + +COPY . /mmcv + +RUN pip install -e . diff --git a/LICENSES.md b/LICENSES.md index 3cdeddf..9bb0c8c 100644 --- a/LICENSES.md +++ b/LICENSES.md @@ -2,10 +2,7 @@ In this file, we list the operations with other licenses instead of Apache 2.0. Users should be careful about adopting these operations in any commercial matters. -| Operation | Files | License | -| :--------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------: | -| upfirdn2d | [mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu](https://github.com/open-mmlab/mmcv/tree/2.x/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu) | NVIDIA License | -| fused_leaky_relu | [mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu](https://github.com/open-mmlab/mmcv/tree/2.x/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu) | NVIDIA License | -| bias_act | [mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.cu](https://github.com/open-mmlab/mmcv/tree/2.x/mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.cu) | NVIDIA License | -| filtered_lrelu | [mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.cu](https://github.com/open-mmlab/mmcv/tree/2.x/mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.cu) | NVIDIA License | -| conv2d_gradfix | [mmcv/ops/conv2d_gradfix.py](https://github.com/open-mmlab/mmcv/tree/2.x/mmcv/ops/conv2d_gradfix.py) | NVIDIA License | +| Operation | Files | License | +| :--------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------: | :------------: | +| upfirdn2d | [mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu) | NVIDIA License | +| fused_leaky_relu | [mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu) | NVIDIA License | diff --git a/MANIFEST.in b/MANIFEST.in index 622635c..65f232e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,5 @@ include requirements/runtime.txt +include mmcv/model_zoo/open_mmlab.json mmcv/model_zoo/deprecated.json mmcv/model_zoo/mmcls.json include mmcv/ops/csrc/common/cuda/*.cuh mmcv/ops/csrc/common/cuda/*.hpp mmcv/ops/csrc/common/*.hpp include mmcv/ops/csrc/pytorch/*.cpp mmcv/ops/csrc/pytorch/cuda/*.cu mmcv/ops/csrc/pytorch/cuda/*.cpp mmcv/ops/csrc/pytorch/cpu/*.cpp include mmcv/ops/csrc/parrots/*.h mmcv/ops/csrc/parrots/*.cpp -include mmcv/ops/csrc/pytorch/mps/*.mm mmcv/ops/csrc/common/mps/*.h mmcv/ops/csrc/common/mps/*.mm -recursive-include mmcv/ops/csrc/ *.h *.hpp *.cpp *.cuh *.cu *.mm diff --git a/README.md b/README.md index 098cf65..9b64100 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ MMCVæ˜¯è®¡ç®—æœºè§†è§‰ç ”ç©¶çš„åŸºç¡€åº“ï¼Œä¸»è¦æä¾›ä»¥ä¸‹åŠŸèƒ½ï¼šå›¾åƒå¤„ + Python 3.7ã€3.8ã€3.9 ### 1ã€ä½¿ç”¨pipæ–¹å¼å®‰è£… -mmcv whl包下载目录:[https://cancon.hpccube.com:65024/4/main/mmcv/dtk23.04](https://cancon.hpccube.com:65024/4/main/mmcv/dtk23.04),选择对应的pytorch版本和python版本下载对应mmcvçš„whl包 +mmcv whl包下载目录:[https://cancon.hpccube.com:65024/4/main/mmcv](https://cancon.hpccube.com:65024/4/main/mmcv),选择对应的pytorch版本和python版本下载对应mmcvçš„whl包 ```shell pip install mmcv* (下载的mmcvçš„whl包) ``` @@ -18,7 +18,7 @@ pip install mmcv* (下载的mmcvçš„whl包) 1. 基于光æºpytorch基础镜åƒçŽ¯å¢ƒï¼šé•œåƒä¸‹è½½åœ°å€ï¼š[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch),根æ®pytorchã€pythonã€dtkåŠç³»ç»Ÿä¸‹è½½å¯¹åº”的镜åƒç‰ˆæœ¬ã€‚ -2. 基于现有python环境:安装pytorch,pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dtk23.04](https://cancon.hpccube.com:65024/4/main/pytorch/dtk23.04),根æ®pythonã€dtk版本,下载对应pytorchçš„whl包。安装命令如下: +2. 基于现有python环境:安装pytorch,pytorch whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch/dtk24.04.1](https://cancon.hpccube.com:65024/4/main/pytorch/dtk24.04.1),根æ®pythonã€dtk版本,下载对应pytorchçš„whl包。安装命令如下: ```shell pip install torch* (下载的torchçš„whl包) pip install setuptools==59.5.0 wheel @@ -32,11 +32,17 @@ git clone https://developer.hpccube.com/codes/aicomponent/mmcv # æ ¹æ®ç¼–译需 - æä¾›2ç§æºç ç¼–译方å¼ï¼ˆè¿›å…¥mmcv目录): ``` 1. 编译whl包并安装 -MMCV_WITH_OPS=1 ROCM_HOME=${ROCM_PATH} python3 setup.py -v bdist_wheel +MMCV_WITH_OPS=1 python3 setup.py -v bdist_wheel pip install dist/mmcv* 2. æºç ç¼–译安装 -MMCV_WITH_OPS=1 ROCM_HOME=${ROCM_PATH} python3 setup.py install +MMCV_WITH_OPS=1 python3 setup.py install +``` +3. æµ‹è¯•éªŒè¯ +``` +cd test +pytest -s ./test_arraymisc.py +pytest -s ./test_ops ``` #### 注æ„事项 + 若使用pip installä¸‹è½½å®‰è£…è¿‡æ…¢ï¼Œå¯æ·»åŠ pypiæ¸…åŽæºï¼š-i https://pypi.tuna.tsinghua.edu.cn/simple/ @@ -52,3 +58,4 @@ MMCV_WITH_OPS=1 ROCM_HOME=${ROCM_PATH} python3 setup.py install - [README_ORIGIN](README_ORIGIN.md) - [README_zh-CN](README_zh-CN.md) - [https://github.com/open-mmlab/mmcv](https://github.com/open-mmlab/mmcv) + diff --git a/README_ORIGIN.md b/README_ORIGIN.md index 25d290f..e9e3f8e 100644 --- a/README_ORIGIN.md +++ b/README_ORIGIN.md @@ -1,119 +1,204 @@
- -
 
-
- OpenMMLab website - - - HOT - - -      - OpenMMLab platform - - - TRY IT OUT - - -
-
 
+
-[![docs](https://img.shields.io/badge/docs-2.x-blue)](https://mmcv.readthedocs.io/en/2.x/) -[![platform](https://img.shields.io/badge/platform-Linux%7CWindows%7CmacOS-blue)](https://mmcv.readthedocs.io/en/2.x/get_started/installation.html) -[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmcv)](https://pypi.org/project/mmcv/) -[![pytorch](https://img.shields.io/badge/pytorch-1.6~1.13-orange)](https://pytorch.org/get-started/previous-versions/) -[![cuda](https://img.shields.io/badge/cuda-9.2~11.7-green)](https://developer.nvidia.com/cuda-downloads) -[![PyPI](https://img.shields.io/pypi/v/mmcv)](https://pypi.org/project/mmcv) -[![badge](https://github.com/open-mmlab/mmcv/workflows/build/badge.svg)](https://github.com/open-mmlab/mmcv/actions) -[![codecov](https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmcv) -[![license](https://img.shields.io/github/license/open-mmlab/mmcv.svg)](https://github.com/open-mmlab/mmcv/blob/master/LICENSE) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmcv)](https://pypi.org/project/mmcv/) [![PyPI](https://img.shields.io/pypi/v/mmcv)](https://pypi.org/project/mmcv) [![badge](https://github.com/open-mmlab/mmcv/workflows/build/badge.svg)](https://github.com/open-mmlab/mmcv/actions) [![codecov](https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmcv) [![license](https://img.shields.io/github/license/open-mmlab/mmcv.svg)](https://github.com/open-mmlab/mmcv/blob/master/LICENSE) English | [简体中文](README_zh-CN.md) ## Introduction -MMCV is a foundational library for computer vision research and it provides the following functionalities: +MMCV is a foundational library for computer vision research and supports many +research projects as below: -- [Image/Video processing](https://mmcv.readthedocs.io/en/2.x/understand_mmcv/data_process.html) -- [Image and annotation visualization](https://mmcv.readthedocs.io/en/2.x/understand_mmcv/visualization.html) -- [Image transformation](https://mmcv.readthedocs.io/en/2.x/understand_mmcv/data_transform.html) -- [Various CNN architectures](https://mmcv.readthedocs.io/en/2.x/understand_mmcv/cnn.html) -- [High-quality implementation of common CPU and CUDA ops](https://mmcv.readthedocs.io/en/2.x/understand_mmcv/ops.html) +- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision. +- [MIM](https://github.com/open-mmlab/mim): MIM Installs OpenMMLab Packages. +- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark. +- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark. +- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection. +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark. +- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark. +- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark. +- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark. +- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox. +- [MMOCR](https://github.com/open-mmlab/mmocr): A Comprehensive Toolbox for Text Detection, Recognition and Understanding. +- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox. +- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark. +- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab FewShot Learning Toolbox and Benchmark. -It supports the following systems: +It provides the following functionalities. -- Linux -- Windows -- macOS +- Universal IO APIs +- Image/Video processing +- Image and annotation visualization +- Useful utilities (progress bar, timer, ...) +- PyTorch runner with hooking mechanism +- Various CNN architectures +- High-quality implementation of common CUDA ops -See the [documentation](http://mmcv.readthedocs.io/en/2.x) for more features and usage. +See the [documentation](http://mmcv.readthedocs.io/en/latest) for more features and usage. -Note: MMCV requires Python 3.7+. +Note: MMCV requires Python 3.6+. ## Installation There are two versions of MMCV: -- **mmcv**: comprehensive, with full features and various CUDA ops out of the box. It takes longer time to build. -- **mmcv-lite**: lite, without CUDA ops but all other features, similar to mmcv\<1.0.0. It is useful when you do not need those CUDA ops. +- **mmcv-full**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build. +- **mmcv**: lite, without CUDA ops but all other features, similar to mmcv<1.0.0. It is useful when you do not need those CUDA ops. **Note**: Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is available`. -### Install mmcv +a. Install the full version. + +Before installing mmcv-full, make sure that PyTorch has been successfully installed following the [official guide](https://pytorch.org/). -Before installing mmcv, make sure that PyTorch has been successfully installed following the [PyTorch official installation guide](https://github.com/pytorch/pytorch#installation). For apple silicon users, please use PyTorch 1.13+. +We provide pre-built mmcv packages (recommended) with different PyTorch and CUDA versions to simplify the building. In addition, you can run [check_installation.py](.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands. -The command to install mmcv: +i. Install the latest version. -```bash -pip install -U openmim -mim install "mmcv>=2.0.0rc1" +The rule for installing the latest ``mmcv-full`` is as follows: + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html ``` -If you need to specify the version of mmcv, you can use the following command: +Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one. For example, +to install the latest ``mmcv-full`` with ``CUDA 11.1`` and ``PyTorch 1.9.0``, use the following command: -```bash -mim install mmcv==2.0.0rc3 +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html ``` -If you find that the above installation command does not use a pre-built package ending with `.whl` but a source package ending with `.tar.gz`, you may not have a pre-build package corresponding to the PyTorch or CUDA or mmcv version, in which case you can [build mmcv from source](https://mmcv.readthedocs.io/en/2.x/get_started/build.html). +**Note**: mmcv-full is only compiled on PyTorch 1.x.0 because the compatibility usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you can install mmcv-full compiled with PyTorch 1.x.0 and it usually works well. For example, if your PyTorch version is 1.8.1 and CUDA version is 11.1, you can use the following command to install mmcv-full. -
-Installation log using pre-built packages +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html +``` -Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
-Collecting mmcv
-Downloading https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/mmcv-2.0.0rc3-cp38-cp38-manylinux1_x86_64.whl +For more details, please refer the the following tables and delete ``=={mmcv_version}``. -
+ii. Install a specified version. -
-Installation log using source packages +The rule for installing a specified ``mmcv-full`` is as follows: -Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
-Collecting mmcv==2.0.0rc3
-Downloading mmcv-2.0.0rc3.tar.gz +```shell +pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html +``` + +First of all, please refer to the Releases and replace ``{mmcv_version}`` a specified one. e.g. ``1.3.9``. +Then replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired versions. For example, +to install ``mmcv-full==1.3.9`` with ``CUDA 11.1`` and ``PyTorch 1.9.0``, use the following command: -
+```shell +pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html +``` -For more installation methods, please refer to the [Installation documentation](https://mmcv.readthedocs.io/en/2.x/get_started/installation.html). +For more details, please refer the the following tables. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUDA torch1.10torch1.9torch1.8torch1.7torch1.6torch1.5
11.3
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
11.1
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
11.0
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
10.2
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html
10.1
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html
9.2
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html
cpu
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html
+ +**Note**: The pre-built packages provided above do not include all versions of mmcv-full, you can click on the corresponding links to see the supported versions. For example, you can click [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html) and you can see that `cu102-torch1.8.0` only provides 1.3.0 and above versions of mmcv-full. In addition, We no longer provide `mmcv-full` pre-built packages compiled with `PyTorch 1.3 & 1.4` since v1.3.17. You can find previous versions that compiled with PyTorch 1.3 & 1.4 [here](./docs/get_started/previous_versions.md). The compatibility is still ensured in our CI, but we will discard the support of PyTorch 1.3 & 1.4 next year. + +Another way is to compile locally by running + +```python +pip install mmcv-full +``` -### Install mmcv-lite +Note that the local compiling may take up to 10 mins. -If you need to use PyTorch-related modules, make sure PyTorch has been successfully installed in your environment by referring to the [PyTorch official installation guide](https://github.com/pytorch/pytorch#installation). +b. Install the lite version. -```bash -pip install -U openmim -mim install "mmcv-lite>=2.0.0rc1" +```python +pip install mmcv ``` +c. Install full version with custom operators for onnxruntime + +- Check [here](docs/deployment/onnxruntime_op.md) for detailed instruction. + +If you would like to build MMCV from source, please refer to the [guide](https://mmcv.readthedocs.io/en/latest/get_started/build.html). + ## FAQ If you face some installation issues, CUDA related issues or RuntimeErrors, -you may first refer to this [Frequently Asked Questions](https://mmcv.readthedocs.io/en/2.x/faq.html). - -If you face installation problems or runtime issues, you may first refer to this [Frequently Asked Questions](https://mmcv.readthedocs.io/en/2.x/faq.html) to see if there is a solution. If the problem is still not solved, feel free to open an [issue](https://github.com/open-mmlab/mmcv/issues). +you may first refer to this [Frequently Asked Questions](https://mmcv.readthedocs.io/en/latest/faq.html). ## Citation @@ -135,27 +220,3 @@ We appreciate all contributions to improve MMCV. Please refer to [CONTRIBUTING.m ## License MMCV is released under the Apache 2.0 license, while some specific operations in this library are with other licenses. Please refer to [LICENSES.md](LICENSES.md) for the careful check, if you are using our code for commercial matters. - -## Projects in OpenMMLab - -- [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab foundational library for training deep learning models. -- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision. -- [MIM](https://github.com/open-mmlab/mim): MIM installs OpenMMLab packages. -- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark. -- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark. -- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection. -- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark. -- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark. -- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark. -- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox. -- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark. -- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark. -- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark. -- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark. -- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark. -- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark. -- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark. -- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark. -- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox. -- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox. -- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework. diff --git a/README_zh-CN.md b/README_zh-CN.md index d9a81eb..e3288ee 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -1,116 +1,200 @@
- -
 
-
- OpenMMLab 官网 - - - HOT - - -      - OpenMMLab å¼€æ”¾å¹³å° - - - TRY IT OUT - - -
-
 
+
-[![docs](https://img.shields.io/badge/docs-2.x-blue)](https://mmcv.readthedocs.io/zh_CN/2.x/) -[![platform](https://img.shields.io/badge/platform-Linux%7CWindows%7CmacOS-blue)](https://mmcv.readthedocs.io/zh_CN/2.x/get_started/installation.html) -[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmcv)](https://pypi.org/project/mmcv/) -[![pytorch](https://img.shields.io/badge/pytorch-1.6~1.13-orange)](https://pytorch.org/get-started/previous-versions/) -[![cuda](https://img.shields.io/badge/cuda-9.2~11.7-green)](https://developer.nvidia.com/cuda-downloads) -[![PyPI](https://img.shields.io/pypi/v/mmcv)](https://pypi.org/project/mmcv) -[![badge](https://github.com/open-mmlab/mmcv/workflows/build/badge.svg)](https://github.com/open-mmlab/mmcv/actions) -[![codecov](https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmcv) -[![license](https://img.shields.io/github/license/open-mmlab/mmcv.svg)](https://github.com/open-mmlab/mmcv/blob/master/LICENSE) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmcv)](https://pypi.org/project/mmcv/) [![PyPI](https://img.shields.io/pypi/v/mmcv)](https://pypi.org/project/mmcv) [![badge](https://github.com/open-mmlab/mmcv/workflows/build/badge.svg)](https://github.com/open-mmlab/mmcv/actions) [![codecov](https://codecov.io/gh/open-mmlab/mmcv/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmcv) [![license](https://img.shields.io/github/license/open-mmlab/mmcv.svg)](https://github.com/open-mmlab/mmcv/blob/master/LICENSE) [English](README.md) | 简体中文 ## 简介 -MMCV 是一个é¢å‘计算机视觉的基础库,它æä¾›äº†ä»¥ä¸‹åŠŸèƒ½ï¼š +MMCV 是一个é¢å‘计算机视觉的基础库,它支æŒäº†å¾ˆå¤šå¼€æºé¡¹ç›®ï¼Œä¾‹å¦‚: -- [图åƒå’Œè§†é¢‘处ç†](https://mmcv.readthedocs.io/zh_CN/2.x/understand_mmcv/data_process.html) -- [图åƒå’Œæ ‡æ³¨ç»“æžœå¯è§†åŒ–](https://mmcv.readthedocs.io/zh_CN/2.x/understand_mmcv/visualization.html) -- [图åƒå˜æ¢](https://mmcv.readthedocs.io/zh_CN/2.x/understand_mmcv/data_transform.html) -- [å¤šç§ CNN 网络结构](https://mmcv.readthedocs.io/zh_CN/2.x/understand_mmcv/cnn.html) -- [高质é‡å®žçŽ°çš„å¸¸è§ CUDA ç®—å­](https://mmcv.readthedocs.io/zh_CN/2.x/understand_mmcv/ops.html) +- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库 +- [MIM](https://github.com/open-mmlab/mim): OpenMMLab 项目ã€ç®—æ³•ã€æ¨¡åž‹çš„ç»Ÿä¸€å…¥å£ +- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图åƒåˆ†ç±»å·¥å…·ç®±ä¸Žæµ‹è¯•基准 +- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 检测工具箱与测试基准 +- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用3Dç›®æ ‡æ£€æµ‹å¹³å° +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱与测试基准 +- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频ç†è§£å·¥å…·ç®±ä¸Žæµ‹è¯•基准 +- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab ä¸€ä½“åŒ–è§†é¢‘ç›®æ ‡æ„ŸçŸ¥å¹³å° +- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab å§¿æ€ä¼°è®¡å·¥å…·ç®±ä¸Žæµ‹è¯•基准 +- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图åƒè§†é¢‘编辑工具箱 +- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab å…¨æµç¨‹æ–‡å­—检测识别ç†è§£å·¥å…·åŒ… +- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab æ–°ä¸€ä»£ç”Ÿæˆæ¨¡åž‹å·¥å…·ç®± +- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab å…‰æµä¼°è®¡å·¥å…·ç®±ä¸Žæµ‹è¯•基准 +- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准 -MMCV 支æŒå¤šç§å¹³å°ï¼ŒåŒ…括: +MMCV æä¾›äº†å¦‚下众多功能: -- Linux -- Windows -- macOS +- 通用的 IO æŽ¥å£ +- 图åƒå’Œè§†é¢‘å¤„ç† +- 图åƒå’Œæ ‡æ³¨ç»“æžœå¯è§†åŒ– +- 常用å°å·¥å…·ï¼ˆè¿›åº¦æ¡ï¼Œè®¡æ—¶å™¨ç­‰ï¼‰ +- 基于 PyTorch 的通用训练框架 +- å¤šç§ CNN 网络结构 +- 高质é‡å®žçŽ°çš„å¸¸è§ CUDA ç®—å­ -如想了解更多特性和使用,请å‚考[文档](http://mmcv.readthedocs.io/zh_CN/2.x)。 +如想了解更多特性和使用,请å‚考[文档](http://mmcv.readthedocs.io/en/latest)。 -æç¤º: MMCV éœ€è¦ Python 3.7 以上版本。 +æç¤º: MMCV éœ€è¦ Python 3.6 以上版本。 ## 安装 MMCV 有两个版本: -- **mmcv**: å®Œæ•´ç‰ˆï¼ŒåŒ…å«æ‰€æœ‰çš„特性以åŠä¸°å¯Œçš„开箱å³ç”¨çš„ CUDA ç®—å­ã€‚注æ„完整版本å¯èƒ½éœ€è¦æ›´é•¿æ—¶é—´æ¥ç¼–译。 -- **mmcv-lite**: 精简版,ä¸åŒ…å« CUDA ç®—å­ä½†åŒ…å«å…¶ä½™æ‰€æœ‰ç‰¹æ€§å’ŒåŠŸèƒ½ï¼Œç±»ä¼¼ MMCV 1.0 之å‰çš„版本。如果你ä¸éœ€è¦ä½¿ç”¨ CUDA ç®—å­çš„è¯ï¼Œç²¾ç®€ç‰ˆå¯ä»¥ä½œä¸ºä¸€ä¸ªè€ƒè™‘选项。 +- **mmcv-full**: å®Œæ•´ç‰ˆï¼ŒåŒ…å«æ‰€æœ‰çš„特性以åŠä¸°å¯Œçš„开箱å³ç”¨çš„ CUDA ç®—å­ã€‚注æ„完整版本å¯èƒ½éœ€è¦æ›´é•¿æ—¶é—´æ¥ç¼–译。 +- **mmcv**: 精简版,ä¸åŒ…å« CUDA ç®—å­ä½†åŒ…å«å…¶ä½™æ‰€æœ‰ç‰¹æ€§å’ŒåŠŸèƒ½ï¼Œç±»ä¼¼ MMCV 1.0 之å‰çš„版本。如果你ä¸éœ€è¦ä½¿ç”¨ CUDA ç®—å­çš„è¯ï¼Œç²¾ç®€ç‰ˆå¯ä»¥ä½œä¸ºä¸€ä¸ªè€ƒè™‘选项。 + +**注æ„**: 请ä¸è¦åœ¨åŒä¸€ä¸ªçŽ¯å¢ƒä¸­å®‰è£…ä¸¤ä¸ªç‰ˆæœ¬ï¼Œå¦åˆ™å¯èƒ½ä¼šé‡åˆ°ç±»ä¼¼ `ModuleNotFound` 的错误。在安装一个版本之å‰ï¼Œéœ€è¦å…ˆå¸è½½å¦ä¸€ä¸ªã€‚`如果CUDAå¯ç”¨ï¼Œå¼ºçƒˆæŽ¨è安装mmcv-full`。 -**注æ„**: 请ä¸è¦åœ¨åŒä¸€ä¸ªçŽ¯å¢ƒä¸­å®‰è£…ä¸¤ä¸ªç‰ˆæœ¬ï¼Œå¦åˆ™å¯èƒ½ä¼šé‡åˆ°ç±»ä¼¼ `ModuleNotFound` 的错误。在安装一个版本之å‰ï¼Œéœ€è¦å…ˆå¸è½½å¦ä¸€ä¸ªã€‚`如果 CUDA å¯ç”¨ï¼Œå¼ºçƒˆæŽ¨è安装 mmcv`。 +a. 安装完整版 -### 安装 mmcv +在安装 mmcv-full 之å‰ï¼Œè¯·ç¡®ä¿ PyTorch å·²ç»æˆåŠŸå®‰è£…åœ¨çŽ¯å¢ƒä¸­ï¼Œå¯ä»¥å‚考 PyTorch 官方[文档](https://pytorch.org/)。 -在安装 mmcv 之å‰ï¼Œè¯·ç¡®ä¿ PyTorch å·²ç»æˆåŠŸå®‰è£…åœ¨çŽ¯å¢ƒä¸­ï¼Œå¯ä»¥å‚考 [PyTorch 官方安装文档](https://github.com/pytorch/pytorch#installation)。如果你使用的是æ­è½½ apple silicon çš„ mac 设备,请安装 PyTorch 1.13+ 的版本。 +我们æä¾›äº†ä¸åŒ PyTorch å’Œ CUDA 版本的 mmcv-full 预编译包,å¯ä»¥å¤§å¤§ç®€åŒ–用户安装编译过程。强烈推è通过预编译包æ¥å®‰è£…。å¦å¤–,安装完æˆåŽå¯ä»¥è¿è¡Œ [check_installation.py](.dev_scripts/check_installation.py) 脚本检查 mmcv-full 是å¦å®‰è£…æˆåŠŸã€‚ -安装 mmcv 的命令如下: +i. 安装最新版本 -```bash -pip install -U openmim -mim install "mmcv>=2.0.0rc1" +如下是安装最新版 ``mmcv-full`` 的命令 + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html ``` -å¦‚æžœéœ€è¦æŒ‡å®š mmcv 的版本,å¯ä»¥ä½¿ç”¨ä»¥ä¸‹å‘½ä»¤ +请将链接中的 ``{cu_version}`` å’Œ ``{torch_version}`` æ ¹æ®è‡ªèº«éœ€æ±‚æ›¿æ¢æˆå®žé™…的版本å·ï¼Œä¾‹å¦‚想安装和 ``CUDA 11.1``ã€``PyTorch 1.9.0`` 兼容的最新版 ``mmcv-full``,使用如下替æ¢è¿‡çš„命令 -```bash -mim install mmcv==2.0.0rc3 +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html ``` -如果å‘现上述的安装命令没有使用预编译包(以 `.whl` 结尾)而是使用æºç åŒ…(以 `.tar.gz` 结尾)安装,则有å¯èƒ½æ˜¯æˆ‘们没有æä¾›å’Œå½“å‰çŽ¯å¢ƒçš„ PyTorch 版本ã€CUDA 版本相匹é…çš„ mmcv 预编译包,此时,你å¯ä»¥[æºç å®‰è£… mmcv](https://mmcv.readthedocs.io/zh_CN/2.x/get_started/build.html)。 +**注æ„**: PyTorch 在 1.x.0 å’Œ 1.x.1 之间通常是兼容的,故 mmcv-full åªæä¾› 1.x.0 的编译包。如果你的 PyTorch 版本是 1.x.1,你å¯ä»¥æ”¾å¿ƒåœ°å®‰è£…在 1.x.0 版本编译的 mmcv-full。例如,如果你的 PyTorch 版本是 1.8.1ã€CUDA 版本是 11.1,你å¯ä»¥ä½¿ç”¨ä»¥ä¸‹å‘½ä»¤å®‰è£… mmcv-full。 + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html +``` -
-使用预编译包的安装日志 +å¦‚æžœæƒ³çŸ¥é“æ›´å¤š CUDA å’Œ PyTorch 版本的命令,å¯ä»¥å‚考下é¢çš„表格,将链接中的 ``=={mmcv_version}`` 删去å³å¯ã€‚ -Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
-Collecting mmcv
-Downloading https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/mmcv-2.0.0rc3-cp38-cp38-manylinux1_x86_64.whl +ii. 安装特定的版本 -
+如下是安装特定版本 ``mmcv-full`` 的命令 -
-使用æºç åŒ…的安装日志 +```shell +pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html +``` -Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
-Collecting mmcv==2.0.0rc3
-Downloading mmcv-2.0.0rc3.tar.gz +首先请å‚考版本å‘å¸ƒä¿¡æ¯æ‰¾åˆ°æƒ³è¦å®‰è£…的版本å·ï¼Œå°† ``{mmcv_version}`` æ›¿æ¢æˆè¯¥ç‰ˆæœ¬å·ï¼Œä¾‹å¦‚ ``1.3.9``。 +ç„¶åŽå°†é“¾æŽ¥ä¸­çš„ ``{cu_version}`` å’Œ ``{torch_version}`` æ ¹æ®è‡ªèº«éœ€æ±‚æ›¿æ¢æˆå®žé™…的版本å·ï¼Œä¾‹å¦‚想安装和 ``CUDA 11.1``ã€``PyTorch 1.9.0`` 兼容的 ``mmcv-full`` 1.3.9 版本,使用如下替æ¢è¿‡çš„命令 -
+```shell +pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html +``` -更多安装方å¼è¯·å‚考[安装文档](https://mmcv.readthedocs.io/zh_CN/2.x/get_started/installation.html)。 +对于更多的 PyTorch å’Œ CUDA 版本组åˆï¼Œè¯·å‚考下表: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUDA torch1.10torch1.9torch1.8torch1.7torch1.6torch1.5
11.3
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
11.1
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
11.0
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
10.2
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html
10.1
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html
9.2
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html
cpu
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html
+ +**注æ„**:以上æä¾›çš„预编译包并ä¸å›Šæ‹¬æ‰€æœ‰çš„ mmcv-full 版本,你å¯ä»¥ç‚¹å‡»å¯¹åº”链接查看支æŒçš„版本。例如,点击 [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html),å¯ä»¥çœ‹åˆ° `cu102-torch1.8.0` åªæä¾›äº† 1.3.0 åŠä»¥ä¸Šçš„ mmcv-full 版本。å¦å¤–,从 `mmcv v1.3.17` 开始,我们ä¸å†æä¾›`PyTorch 1.3 & 1.4` 对应的 mmcv-full 预编译包。你å¯ä»¥åœ¨ [è¿™](./docs_zh_CN/get_started/previous_versions.md) 找到 `PyTorch 1.3 & 1.4` 对应的预编包。虽然我们ä¸å†æä¾› `PyTorch 1.3 & 1.4` 对应的预编译包,但是我们ä¾ç„¶åœ¨ CI 中ä¿è¯å¯¹å®ƒä»¬çš„兼容æŒç»­åˆ°ä¸‹ä¸€å¹´ã€‚ + +除了使用预编译包之外,å¦ä¸€ç§æ–¹å¼æ˜¯åœ¨æœ¬åœ°è¿›è¡Œç¼–译,直接è¿è¡Œä¸‹è¿°å‘½ä»¤ + +```python +pip install mmcv-full +``` -### 安装 mmcv-lite +ä½†æ³¨æ„æœ¬åœ°ç¼–译å¯èƒ½ä¼šè€—æ—¶ 10 分钟以上。 -如果你需è¦ä½¿ç”¨å’Œ PyTorch 相关的模å—ï¼Œè¯·ç¡®ä¿ PyTorch å·²ç»æˆåŠŸå®‰è£…åœ¨çŽ¯å¢ƒä¸­ï¼Œå¯ä»¥å‚考 [PyTorch 官方安装文档](https://github.com/pytorch/pytorch#installation)。 +b. 安装精简版 -```bash -pip install -U openmim -mim install "mmcv-lite>=2.0.0rc1" +```python +pip install mmcv ``` +c. 安装完整版并且编译 onnxruntime çš„è‡ªå®šä¹‰ç®—å­ + +- 详细的指å—请查看 [这里](docs/deployment/onnxruntime_op.md)。 + +如果想从æºç ç¼–译 MMCV,请å‚考[该文档](https://mmcv.readthedocs.io/en/latest/get_started/build.html)。 + ## FAQ -如果你é‡åˆ°äº†å®‰è£…问题或者è¿è¡Œæ—¶é—®é¢˜ï¼Œè¯·æŸ¥çœ‹[问题解决页é¢](https://mmcv.readthedocs.io/zh_CN/2.x/faq.html)是å¦å·²æœ‰è§£å†³æ–¹æ¡ˆã€‚如果问题ä»ç„¶æ²¡æœ‰è§£å†³ï¼Œæ¬¢è¿Žæ [issue](https://github.com/open-mmlab/mmcv/issues)。 +如果你é‡åˆ°äº†å®‰è£…问题,CUDA 相关的问题或者 RuntimeErrors,å¯ä»¥é¦–å…ˆå‚考[问题解决页é¢](https://mmcv.readthedocs.io/en/latest/faq.html) 看是å¦å·²ç»æœ‰è§£å†³æ–¹æ¡ˆã€‚ ## è´¡çŒ®æŒ‡å— @@ -119,37 +203,12 @@ mim install "mmcv-lite>=2.0.0rc1" ## 许å¯è¯ `MMCV` ç›®å‰ä»¥ Apache 2.0 的许å¯è¯å‘å¸ƒï¼Œä½†æ˜¯å…¶ä¸­æœ‰ä¸€éƒ¨åˆ†åŠŸèƒ½å¹¶ä¸æ˜¯ä½¿ç”¨çš„ Apache2.0 许å¯è¯ï¼Œæˆ‘们在 [许å¯è¯](LICENSES.md) 中详细地列出了这些功能以åŠä»–们对应的许å¯è¯ï¼Œå¦‚果您正在从事盈利性活动,请谨慎å‚考此文档。 - -## OpenMMLab 的其他项目 - -- [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab 深度学习模型训练基础库 -- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库 -- [MIM](https://github.com/open-mmlab/mim): MIM 是 OpenMMlab 项目ã€ç®—æ³•ã€æ¨¡åž‹çš„ç»Ÿä¸€å…¥å£ -- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图åƒåˆ†ç±»å·¥å…·ç®± -- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱 -- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D ç›®æ ‡æ£€æµ‹å¹³å° -- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准 -- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO 系列工具箱与测试基准 -- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱 -- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab å…¨æµç¨‹æ–‡å­—检测识别ç†è§£å·¥å…·ç®± -- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab å§¿æ€ä¼°è®¡å·¥å…·ç®± -- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab äººä½“å‚æ•°åŒ–模型工具箱与测试基准 -- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监ç£å­¦ä¹ å·¥å…·ç®±ä¸Žæµ‹è¯•基准 -- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准 -- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准 -- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频ç†è§£å·¥å…·ç®± -- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab ä¸€ä½“åŒ–è§†é¢‘ç›®æ ‡æ„ŸçŸ¥å¹³å° -- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab å…‰æµä¼°è®¡å·¥å…·ç®±ä¸Žæµ‹è¯•基准 -- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图åƒè§†é¢‘编辑工具箱 -- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab å›¾ç‰‡è§†é¢‘ç”Ÿæˆæ¨¡åž‹å·¥å…·ç®± -- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架 - ## 欢迎加入 OpenMMLab 社区 -扫æä¸‹æ–¹çš„二维ç å¯å…³æ³¨ OpenMMLab 团队的 [知乎官方账å·](https://www.zhihu.com/people/openmmlab),加入 OpenMMLab 团队的 [å®˜æ–¹äº¤æµ QQ 群](https://jq.qq.com/?_wv=1027&k=K0QI8ByU),或添加微信å°åŠ©æ‰‹â€OpenMMLabwx“加入官方交æµå¾®ä¿¡ç¾¤ã€‚ +扫æä¸‹æ–¹çš„二维ç å¯å…³æ³¨ OpenMMLab 团队的 [知乎官方账å·](https://www.zhihu.com/people/openmmlab),加入 OpenMMLab 团队的 [å®˜æ–¹äº¤æµ QQ 群](https://jq.qq.com/?_wv=1027&k=GJP18SjI)
- +
我们会在 OpenMMLab 社区为大家 diff --git a/TERMINOLOGY.md b/TERMINOLOGY.md index 07411b7..61941e3 100644 --- a/TERMINOLOGY.md +++ b/TERMINOLOGY.md @@ -4,27 +4,27 @@ This document is used as a reference for English-Chinese terminology translation 该文档用作中英文翻译对照å‚考。 -| English | 中文 | -| :---------------: | :----------: | -| annotation | 标注 | -| backbone | 主干网络 | -| benchmark | 基准测试 | -| checkpoint | 模型æƒé‡æ–‡ä»¶ | -| classifier | 分类器 | -| cls_head | 分类头 | -| decoder | è§£ç å™¨ | -| detector | 检测器 | -| encoder | ç¼–ç å™¨ | -| finetune | 微调 | -| ground truth | 真实标签 | -| hook | é’©å­ | -| localizer | 定ä½å™¨ | -| neck | 模型颈部 | -| pipeline | æµæ°´çº¿ | -| recognizer | 识别器 | -| register | 注册器 | -| schedule | 调整 | -| scheduler | 调度器 | -| segmentor | 分割器 | -| tensor | å¼ é‡ | -| training schedule | 训练策略 | +| English | 中文 | +| :-----: | :---:| +| annotation | 标注 | +| backbone | 主干网络 | +| benchmark | 基准测试 | +| checkpoint | 模型æƒé‡æ–‡ä»¶ | +| classifier | 分类器 | +| cls_head | 分类头 | +| decoder | è§£ç å™¨ | +| detector | 检测器 | +| encoder | ç¼–ç å™¨ | +| finetune | 微调 | +| ground truth | 真实标签 | +| hook | é’©å­ | +| localizer | 定ä½å™¨ | +| neck | 模型颈部 | +| pipeline | æµæ°´çº¿ | +| recognizer | 识别器 | +| register | 注册器 | +| schedule | 调整 | +| scheduler | 调度器 | +| segmentor | 分割器 | +| tensor | å¼ é‡ | +| training schedule | 训练策略 | diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index 60d5c9d..0000000 --- a/docker/README.md +++ /dev/null @@ -1,70 +0,0 @@ -# Docker images - -There are two `Dockerfile` files to build docker images, one to build an image with the mmcv pre-built package and the other with the mmcv development environment. - -```text -. -|-- README.md -|-- dev # build with mmcv development environment -| `-- Dockerfile -`-- release # build with mmcv pre-built package - `-- Dockerfile -``` - -## Build docker images - -### Build with mmcv pre-built package - -Build with local repository - -```bash -git clone https://github.com/open-mmlab/mmcv.git && cd mmcv -docker build -t mmcv -f docker/release/Dockerfile . -``` - -Or build with remote repository - -```bash -docker build -t mmcv https://github.com/open-mmlab/mmcv.git#master:docker/release -``` - -The [Dockerfile](release/Dockerfile) installs latest released version of mmcv by default, but you can specify mmcv versions to install expected versions. - -```bash -docker image build -t mmcv -f docker/release/Dockerfile --build-arg MMCV=2.0.0rc1 . -``` - -If you also want to use other versions of PyTorch and CUDA, you can also pass them when building docker images. - -An example to build an image with PyTorch 1.11 and CUDA 11.3. - -```bash -docker build -t mmcv -f docker/release/Dockerfile \ - --build-arg PYTORCH=1.9.0 \ - --build-arg CUDA=11.1 \ - --build-arg CUDNN=8 \ - --build-arg MMCV=2.0.0rc1 . -``` - -More available versions of PyTorch and CUDA can be found at [dockerhub/pytorch](https://hub.docker.com/r/pytorch/pytorch/tags). - -### Build with mmcv development environment - -If you want to build an docker image with the mmcv development environment, you can use the following command - -```bash -git clone https://github.com/open-mmlab/mmcv.git && cd mmcv -docker build -t mmcv -f docker/dev/Dockerfile --build-arg CUDA_ARCH=7.5 . -``` - -Note that `CUDA_ARCH` is the cumpute capability of your GPU and you can find it at [Compute Capability](https://developer.nvidia.com/cuda-gpus#compute). - -The building process may take 10 minutes or more. - -## Run images - -```bash -docker run --gpus all --shm-size=8g -it mmcv -``` - -See [docker run](https://docs.docker.com/engine/reference/commandline/run/) for more usages. diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile deleted file mode 100644 index a4d9e23..0000000 --- a/docker/dev/Dockerfile +++ /dev/null @@ -1,31 +0,0 @@ -ARG PYTORCH="1.8.1" -ARG CUDA="10.2" -ARG CUDNN="7" - -FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel - -# To fix GPG key error when running apt-get update -RUN rm /etc/apt/sources.list.d/cuda.list \ - && rm /etc/apt/sources.list.d/nvidia-ml.list \ - && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \ - && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub - -# Install git and system dependencies for opencv-python -RUN apt-get update && apt-get install -y git \ - && apt-get update && apt-get install -y libgl1 libglib2.0-0 - -# Install system dependencies for unit tests -RUN apt-get install -y ffmpeg libturbojpeg \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# build mmcv from source with develop mode -ARG HTTPS_PROXY="" -ENV https_proxy=${HTTPS_PROXY} -ENV FORCE_CUDA="1" -ARG CUDA_ARCH="" -ENV TORCH_CUDA_ARCH_LIST=${CUDA_ARCH} -RUN git clone https://github.com/open-mmlab/mmcv.git /mmcv -WORKDIR /mmcv -RUN git checkout 2.x && git rev-parse --short HEAD -RUN pip install --no-cache-dir -e .[all] -v && pip install pre-commit && pre-commit install diff --git a/docker/release/Dockerfile b/docker/release/Dockerfile deleted file mode 100644 index d5e25e9..0000000 --- a/docker/release/Dockerfile +++ /dev/null @@ -1,23 +0,0 @@ -ARG PYTORCH="1.8.1" -ARG CUDA="10.2" -ARG CUDNN="7" - -FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel - -# To fix GPG key error when running apt-get update -RUN rm /etc/apt/sources.list.d/cuda.list \ - && rm /etc/apt/sources.list.d/nvidia-ml.list \ - && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \ - && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub - -# Install system dependencies for opencv-python -RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Install mmcv -ARG MMCV="" -RUN if [ "${MMCV}" = "" ]; then pip install -U openmim && mim install 'mmcv>=2.0.0rc1'; else pip install -U openmim && mim install mmcv==${MMCV}; fi - -# Verify the installation -RUN python -c 'import mmcv;print(mmcv.__version__)' diff --git a/docs/en/Makefile b/docs/Makefile similarity index 100% rename from docs/en/Makefile rename to docs/Makefile diff --git a/docs/en/_static/community/1.png b/docs/_static/community/1.png similarity index 100% rename from docs/en/_static/community/1.png rename to docs/_static/community/1.png diff --git a/docs/en/_static/community/2.png b/docs/_static/community/2.png similarity index 100% rename from docs/en/_static/community/2.png rename to docs/_static/community/2.png diff --git a/docs/en/_static/community/3.png b/docs/_static/community/3.png similarity index 100% rename from docs/en/_static/community/3.png rename to docs/_static/community/3.png diff --git a/docs/en/_static/css/readthedocs.css b/docs/_static/css/readthedocs.css similarity index 75% rename from docs/en/_static/css/readthedocs.css rename to docs/_static/css/readthedocs.css index 9e3a567..3f425fc 100644 --- a/docs/en/_static/css/readthedocs.css +++ b/docs/_static/css/readthedocs.css @@ -4,7 +4,3 @@ height: 40px; width: 85px; } - -table.colwidths-auto td { - width: 50% -} diff --git a/docs/en/_static/flow_img2toimg1.png b/docs/_static/flow_img2toimg1.png similarity index 100% rename from docs/en/_static/flow_img2toimg1.png rename to docs/_static/flow_img2toimg1.png diff --git a/docs/en/_static/flow_raw_images.png b/docs/_static/flow_raw_images.png similarity index 100% rename from docs/en/_static/flow_raw_images.png rename to docs/_static/flow_raw_images.png diff --git a/docs/en/_static/flow_visualization.png b/docs/_static/flow_visualization.png similarity index 100% rename from docs/en/_static/flow_visualization.png rename to docs/_static/flow_visualization.png diff --git a/docs/en/_static/flow_warp.png b/docs/_static/flow_warp.png similarity index 100% rename from docs/en/_static/flow_warp.png rename to docs/_static/flow_warp.png diff --git a/docs/en/_static/flow_warp_diff.png b/docs/_static/flow_warp_diff.png similarity index 100% rename from docs/en/_static/flow_warp_diff.png rename to docs/_static/flow_warp_diff.png diff --git a/docs/en/_static/image/mmcv-logo.png b/docs/_static/image/mmcv-logo.png similarity index 100% rename from docs/en/_static/image/mmcv-logo.png rename to docs/_static/image/mmcv-logo.png diff --git a/docs/en/_static/parallel_progress.gif b/docs/_static/parallel_progress.gif similarity index 100% rename from docs/en/_static/parallel_progress.gif rename to docs/_static/parallel_progress.gif diff --git a/docs/en/_static/parallel_progress.png b/docs/_static/parallel_progress.png similarity index 100% rename from docs/en/_static/parallel_progress.png rename to docs/_static/parallel_progress.png diff --git a/docs/en/_static/progress.gif b/docs/_static/progress.gif similarity index 100% rename from docs/en/_static/progress.gif rename to docs/_static/progress.gif diff --git a/docs/en/_static/progress.png b/docs/_static/progress.png similarity index 100% rename from docs/en/_static/progress.png rename to docs/_static/progress.png diff --git a/docs/_static/qq_group_qrcode.jpg b/docs/_static/qq_group_qrcode.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7c6b04f561da283ae622f4219ea9b8cabf8f301a GIT binary patch literal 71955 zcmeEucU+Upw&(|_*bosBq-tmq1eD%UA@m}U5IQ0t5PFwx0|f-61f(l9BoL%`P^1V* zCzQ~nhTeO9aqqL;d*6G`efOQ;`SVQ}zBT1rYs#8gWhOZpIQaowR+d+i2S`W&fQ0w~ zP9{iiDap#3Jk-*VS5k%iRzOMz5CiFD0I;)nb=FdNa6?y5|HjX}KZ2>5i{l^gKY0@I zZukAf4gfto|D^o?mUYhD!o`eOX_XjVor%PWjWs92S1o^spZ+X2|GizWQlb? z5aC;ve}NzW1@>@tbS2jL_Org3qw`OAmD6#o(`!1Ai4zrWbgjWdmIMC*ZcmJG=f6><)#CI5p{d6FHECE}<0=NMv0rr3yz)OVq zfjhumK=@<`kOj`3IYWNt^jUIp@^k0Tp1*jN;^Ku17q4HTro75T&%(?^&&YU#jhFKV z=r$`O<1G=c+js5?3JS7tib;v`OY#Z`^8Z9aa_-!@ix)1^QBctF-(q*NqiR3s<$#14=Eq+}#NIs1=5ewOUinbV{{OD!$~BxGbHWM|HvBPTz5 z<}BGcfP|Fn)M=_S8pq+WO_GRhnOc}q~p>_z0bHoT~AVy#Q|Wprv9kqkYNJ_$hfmp1%P<_xin zgX(9|&t{w=`$r3jO`tk;<>uYf)b}+_ot}eOWl!+HMdItERAf|u43PBf%)382^Z%cJ z9RczQG^h1Oy%o!&zvlku;6pgbH9OQ|`5u1?cst8Fz!1}&HU55%Q))hLkJF-|^+9k)Vux_BUiCfeIFmuY1Fr_~}9b;9sst z+h-U5+noOm&##29Q1D6wWu`!}-^-igGE}J1mzzhY;4_p&=?bvCm$}FCy7hg?I@h+} zql=dirRj&lG*sH>O}#}tdp=Yvd;>oGfyk$N2<1!1yhQMM=Cg?cy8p=k@6Z43-0e@h#NS1g`$~*8Y+6zZ+4K&HC0_$LsnDxnDN@Sjzc7?(#pQY>*LnC8Kx(=+7hK#gp$V z9yN@#4kyW!O?Rd74DOrdPVI?4-9Pda=WfS*C=$}&quQ8^7tRx779vnMrsW*bD?DkEYwRjDi` zX4FDs!d865NbLkD&?(p6exf!h6(kd3Ei?FW7R(}5cstW=OW$qMZPK!XncoOKI~^K( z-rS72GMk$zx}$a2Td|xVIn>7Ai_Z#`QMs-Wb;bs!uQS9oubmKF@6r8uBgw^V(6eto zB~JV57L|5=f%0~Kb~o0LS&;r^BfoOYz|*B5#~lazqwCZc3z7~w3>+Kx?RLBeWi&t% zQ)(WUGOvH|Z)t$;JH6})v?4r~7D^I*#Iv;3m`f=_1RDK9K$ZwN$yxW0XGBapL zW1e4J>y;k_Ig5p{S#ebjaGdcM1F zKaI1&37fQb;0R`=Zv8Ts@>?r#S{HlWFnWHNSTXzK%W-^e*0N8~C|F$+ecs_&iuK#L zOUAKMV|Wi~WO#LZ)d_&L91NpEGW!aRMF(=Fxk&$nT3d_f)ckm8EW@BA3@+9^=Q0)eOS1Hu81M;kb?X_d2Y_5f_GTXJu-K-O! zp0MUNncN|-F2gAiR%?7wgvrRU<~ueSW^NQw?D(E-iA3r1r}UP@4Kc%eeS6oG;^;>H3e$IaiMw$mcKY4|<21?9yn zgg5RYFUe;)n~H_Q#Z;5kBK&2XP5>|e?VD2?p2mGzsu|&rPCd+I{I12pbkOmVtvZk> zQEmY~ii;vU&qJ46Yhb25{d+OlWZ>u)qwr4lHt=s&)LtS|1X!|K zqu%dJ8+C$A0=#=sg!zz{`oWm`6Cjewuf?7Yg-(SXv5jerp8#u;=RnFysXF(vla@A* zB?4>S<`_IF2o&L^GR~Jc0c1Y!l(<-i&mN_SpnYpO`w!Bq({(on4Z2aLW?7=x8;MSx~k1&ZfU8MNTR1~d*{N{9*y*fe|Db{S zR*z7@>bltLuJ0;6b|&8w2BkY9$JYuDMsLqo&7v*J=7YiJs>e=@wdGoqo z3}i-L-~`A&x9HVZfT)OnEfS9mPHAv{6_>Zr9abNaI&SGDaO}S>=7ZT0QPu5{?n^9m z)oH$FU?hQa#)i@!pGgW&wux=lIRO@BpL{Pb)FSroTWFYMN7ON z{cbb(Wnbb!qP<6Je3&eO$S~5MS%^G?B660-C-NpDbNPs$Dfh%RnytE;PJLf-Bu1zA zv_*fym%3g@2~>Y2#>Phy^31C3JC}&@`m-fEPXUQxF=Bi|_J659x+Oq@7cx?vI-+R~ z($sX!4iM>xZ5*V&uo(lLlw;9&oIjvF^*1f*Z}ORBKM46HiTd2DyYeZ>C@%0b(tp0f zNOqTBZXP<1lTY*2QI+y;_cP;W^%7G~fR7Uh#iS>FQR5ySPbN5*Va*Hpq@?_6ncdbC z;MoCwiI%hX>Xu?kl%TH+*p$-7Sj+bP*!QR2j?0zNhwuQ*3QyJiIq>`PCset|r}7JJ zJXpKCT4u(#&KLN%9?MZXJm11~YBlXUSJS1`7xv+Gx`w#s@b48T5&?ccOM#ne}uJFV3qnTrj$DW1>F?sW6l8}Zz0nq-GeD+d9{>a!K|1I~|N0BW0 z&z($jO^&H+KkfFYB#GT8a_)$HSr4vtX9SFP-Vd{dM;s!XT+YGJ(JkD=V)$YOLzLnzas`c7|tb(4s~Teclw(;7zKy=s z7=Ic5b-Aq{JiSrd1kd!-Ziq^5lCM3yb^@4U9-RPq-jtQRUYZD&2VP1jtyo9@@;^R4w zT=11ebM;W@EOhp^h^+r}LbsmXCXE9sGb4=4_`S>fUe$z!eU7T66Tn+W%c^HPf1(i4 ze*z#yD7Vp`7yTGhs^)~SNmbhO=UcM6ls_qf%JUGGx16o8W?fhag7ER1cYI;vLL@w- zR7cwq(QO^(D|G+3;Y_BXHRc6zT$zqu&%~Sn7~vCO`Hn+5M%{0}v>TRSM?ee)_mPMXGC&$y7JtffDXrtybUCvDz@fx#o19x)!^ z86_lf_u&pP)mCjmTYlk`V1BP1j2<)!M%#nZ;yMxq=XMiTabw!~Q**5jqVu*Fjx`>h z0G}MT=_@?Aq=s%~p8)Fq+k#uDIXwjh3PFEN(3?I&Ic3?TIbzyIQzVxA)tolQdC&cc z0SG;b-_SbSa3^ z!f?{DCuOUhmrM8anV+W6@8R!;?8ON8xQkB2i2I}bpFR2gQl3$E`dxg!e@6Zu*~Sf` zN%D_sGXJbP(Yg5_CI5FCL|a7T^WU@nNc|^g?hl;AXCwZN3AlbR%{8dI-Wi;=%KuxL z42Apft*N}o=SWBK0%5lB=LMkZTw`;XgYwN!03Bj}ZkCH3gGm+c%aN7Dui_ZisSz+^h3g>{+35oi}aagvh15Cj02oN69Fa3+_g{l!!i&uKd;tp~@9 zm{X{urg2&~r_M6yj10GZ^fg=nH^zGI>B@LRBl3F9qUE_{$V*#knTunXZYhZCQy!dZS0EY&vq9K+Qy&va48LNFw>vf zy9R|pk6IjT`7Ag4MbrD>W!+eX?^9{!W6{A=CM`&p?#e;Ajt8lEPLBX=?}2r{zE+L6 zui2xqfYt!`=c0A6I|jaKwCR79lj1JgPQ0d#sl7|HA)vKlTkOtm6271F@d+?ZfoY2w zLGic{hc=?O`}=zr=9D$aB6JZ-;KsgpGj4Y8D>=vMz2w$ia2oEPLbOGoTYLJ`b$LfW zZhw%ha6sgFjJ+Mo6esRJEA^y{mvpX#Y^wJ*@D$#_(Y6ohQv6tPZHv&EGt+V^Fb zMX$SvVO7ONI8_Xjf>ZWp$Og#cP(r6mRrRteMi5ud>qaGc!CD9UvJiPr7urgzW!#O+bMK3Wby@S1D zLuwA^xO>2B+abSVHI=9Fd-BohsV)o}7x|sovSDxwDjI6K5p}m2f*88vY{&)@fpV>< z8tNKQXfKfAIsrWIy!oK(EIM?lq;l4qrlz2)dAG1PM`N<7CZQ5tH9R+LYdeI1GTaM( zdM(Zh>$)@+nPgHG_Qa_QH`8KyUDd@xTwS|PI|e~wmN&bb`@&!ZsmN^X`z1eel-D%Rq0HL(Jj*$xkdK_`nks*ttl-ItIJy5 z`u0U0m1;cgFQ73onSzlbSZmI8jOl?)u88GL=Aml?%CKSRZFrsry#M|-$ zQsK6(15HBP*TgqfgR-9oul1&2Z)TT{lu;SqIGogC8?o*3NVUwMjq~^7^GtDIg7y2C zzCHoyFH7-+Eu|MV#^KrH;b3Vc1`%N`E(Y#_H?{L(+2Lc!o226`{4L)e`xI2?goeV! z+6`GX#bMAj*RZVAK6?AejR$4sZS@i8jRufk6zvXQ_j}{++e69a2~@M*pbGu@1_W%i zd^pr&wmyZs9O@s-ReXV?I0w%4`HBUp!V;h!`l)cVDC2eS) zKcqU}q3K`NeDutd;YJ_a$RT5aSHzE;8Qgzw^nwVr8(ovDh3EP-8;EJu@dQA5@afeK z%B$WZSre?HV~cfK8AepP1;t97>rCM~rL!xiqk|Kt5X(qUVY(oJ>o7T%=M*^sw4O_m zlBTaduQsqr4bh&1Wxaq9ToredgGNLB=X`Ie3u7{vmT_t>rR{^+VBJUskbBl)Y`%8> zy%(-5M;UIMNAtCZR=S|PE<~$e$PgA)Wf%#U8w9`qHeqkLgXuY&6x3NY?ldcsEppO zG%22M<5ERZ_47_QUi;QpOdZFJg&Bgx%1jR0I;`&swRu@!6*#?kl_!hTBIlScWIQR) z?VDpa>km*On+m_8SiSqiF$ifgvYy0&Ebm{;GAn7phe+<64+(hut*EP9cQ$?-Tnt}_ z=LfZgZ`9mlW6E%|M%50RAAe05P=LA0EX|w%jajEL!Xuhq-5wHVn*m0wh##)pl2O=2 z@8Lm{wyf=B`}UGb>8S3@gA?6bBG=b>2U%Ol5JfH3F)tL@{LM*isZ+#+#uHyikgF6&2V9cyqc)&WAAcgOVYVl%cP2f zOKC<19C=0-9xtjRg@RNgWW1fUy}f1LjV6}Xe%KKT! zWh(*|T&qN4$1RUIxb_B&rSYr9@-=e$IK5mv)zA4(}>oTw#T`yT2Oe z9uh8x^tq_GXi+&-ISk!*bTgo-fen|qu=~zW^T-bAu^LGkwk!%!l|fu$(^8H(*<}ku z?MEn1t=a6OxO9dig@fo1?_!vrdXCP@&CDL3Np9G*Gyd33vz%9@F>yXiMfJcIk-g`i zU<~4Wz$YvY>!_9GtK zZ_r|6osh98lU9#J;bScex!c2>{WnXeWrSn%kN7fNX&N`|lzTB*x}~B~Dv)92aPXZ!4 zR2~&{>syuX3Ej44^ii-F*n^J!n_8S4GgUU{m!nHF9+FSm4*d(GuCs5%;yGg7^E!7J zCvboj-c82?_|# zbwS(ZsqT(FE^3@Vn3J&SxsJl%(Mb(~HW6(KLYkV|x3cl%a|R__0r=1WuG;My6-ovF z>j$+>=_x{I(u%FU4PzOq&0SaT(lWFX{T71=*K$8O?wPO`l{O6Kewo=>uI??@4a8G7 z(x_6<;ZkVy8_Wst{nVEdW;D?`wHUWV=VCzG(}!}EgqL*5m4buPZRQz=%jx?~snFHu z+6KE_D^2H9Pk`g*h5_84o$t>4L&1M?JQEa~5m9B3=yHl{;-J>|lP=5QZ9;>mXkl{?Kskk=xU|YBH!OhAJuffpd z)a?1aRI}{iX+v}xV=f}^1OVU6_UWwT?{SaAdf2d22(g)@?>L~*c@IFKX_)uB1;I9+ zu*ff}ZK8O0d~yhzWYW-gLo7_w-|t?Y_m%E#Ur0`#zzAv({>^dMEGj9XWlI&~MVnUo zzF^^$3OUK+$91_QGdvY`IMmd7I3E}oyvxb>)Klp1-h(&Y-hg0y?O z-K>q#`R0lg5bxcCu&BSPmEl)j9{0KU7e$=_-CRq%O8z(rnS=Ec;D;lVZ>ehM{LxmPo<;i$;0xI6w`?5fn72S8Ag&q2F>ImAG%W_0_n{m@%cAcHJN>navu1O@Bd`{UPiu ztF4oKuNhr$oe=81oZK>Lm7T(IXS~hv5oM{?BfO=KSIAML#^TK*6wyIPt>!=WaeH2s z;g=HV#C(Zm>(cas$ic1VH?MsdW#*Y$`y944qb`%ZuCh70@L_bprx;q)Bjh}&n2UZ9u^c+r-Owmf8|ZR)*2C0`RRh7T=C;3~!PH(} zY#$P!y=V8>zIdnwcf{^o?%6NJQbnm^u%vNsfrr+$xI(Wx`;qsY=q6U9Lx}%7*z!F3 z{o;&v@2Y!FOqwMQx-JHb?k6vhi9k0>uqYG#rqpBjOVQn{vlo_>2bLbTo<7wcsM90g zba%F4WW*T+=5>1ce%0Xw$fniM%`1Gwm>E+zv?5KlGhh$R?`@!fs4j*o zmPPrqZ+?cL?{5O!yNY{@q_31N!z3NkdvLP!)sA6a)iewT{pWQ*aEA@u*0gFfv%tj& z3&-dO=_oPK+ZS_$>Q@eE;2{L{VLs2t)(8fe5``=^MT~H@jB}>)R%zu|w`-V!ITeFQ zFBEe&t8V$>Cc3XUrJ9Q9a9O!_B{GTY)`v{Q^v%%x)Z?ko*TXqA=T&r&MJg_pIN|f| zE+E$m1$UurJld;?DR$|#=9G^|U z%yzlbDe_y@&_Z~Wn0kOAcza6N9f`{%?&(fZ2bcU!nP8p7qZXoLl0NB7cSTXJ$uhsc zX~!Zfx`JhU!+-K2TUu*x?Uv5k@wCYCX(`#iBuM#Ji3f2gI1ADCkW-?^hBX;v+L-oQ zQwEnI=92iw>4G|^j2qOyyG#YS-}FxJN#Lk4T8M;YW|T>-D4M}5x13x-{)i1ud&=uI zFuRHEgrV(fPBl(TabB6V<`dw|319_rfl38wDyYGU&ODXZ&Z%PjmFV0fW>CIB0PmR~ z_Y@;m2emu1eZ3JAJnf0wEAA_nS(1B~t&*fP()(5Uhpylg+xq3ETFU+H;91_12P0xP znkRZ0PUET&ZjrUBI<=dIRUPWni?(l)E_7xeYLYg;)E_ZoY9CTFy1*VL{$_|(PY}_R zN@R5$0YbC2SyJh!}nmDGzry@LG*K`>7=34xSKs*?1<978b)Un{a6c=vVZ2C)(Swmx@Ru(?(tS>AG&kO@?oJ%3=DeR)Bs8XEi!b?d zy8To|TZ50#-PANtM4KFhpo*mtcRIY}XBTZl_c2l#|i{; zH08kDESznwu4dJnP?Y_>KC4`Xnz+_H&b?>?xJN1chDk#m zGY*GJUp3|yjNcbQY>Tu?lH;X)IK|6Hu+E$4JkNPR)c4btrvC|G-eFaDt#jy3&8T1i zM`m}$^X-GEF*awpIVO5B`b7@rcE9!jh(>ghaFShamF>jFeTOVxh+$VgYfus|Gy>Ch zNTo&*u7?$7O6q=S#Ko3@G4C$#QcRo;o`#YNCzvNF;{+Pcpn7wrZo9+1GP)h80r8S9U8Nd$erLxy05Uh*LU}{Fq;2wdsrD;(;sTbTPG8b7p+_ z7G2=GC%_e^5DN*_?r>k}7~DlSQZz=d$* zzJJ>-7a{{lUN{aZ52$VXgd!?!JhJmw(@^ z6jSc3N@jJzLrr}`TA;EQ61f1~5uy=+iIwUv`#C=VJoTRp2_-1wdE73A_H=d0YOtok zbTIGh5oil4r|4x_96)kap^&>rc(ekCKuH=jxa=A#tbO{F4Q_->|>I_aTcMo^RLBnz8VHsEkR6nua~%GE(nGNl(5(W5c-ZM@LWi-ggh!8{lS8UIy**jCSne>It7Abq$t?b-8bSiOf(r_BiEsdJt=Y0 zFKvFjMAXE~ZfC%M+`dH?-yQE)^74VUZZYFxS?_9kFd7okU7}OoZs;IJ1z$G|YKPD; z(D#XJZAm?OSz7|O_CHRH%yE!gw23<3pX!mm%9LF1T?d~wWA{z|R z954bmrOFOydH~M*<}ShBxg$lMWy7f4w`d;B;nKK{uI zvz^~7Vvx~t*4oB|tw=?cAg(y&(xqvPLvY-UhC-I+1V;*&@*h4}aJ!acTckT;vuD$# zSzFyU>aeQi3qT}928Q*OCnqa$2E4RkZsATeW&!YwZ#;^K;#zpGNfSq$^U*1f55fW7xPEjncQO)HBugXy zeK2F}YhoC}{#aW(ALjo&1+g!@gB4s7!gd&_sfVJJER}ikrUl6`n-%U15u z)P$H;LQ3jpt%P_^-&S{DFF3y+C|I!2a64a&f>=m-8C~j)|SpTV=-Pv!p!PI0iGf+uxQb?GW6I!OL2> zCJ}YfOiFUf{V3P1-~(Z5P^uUW4V9)R@@n75lFlKJEA%R~H==x~-JD8T5Viu9n{gFHw1vE4;QjbO8 zk&Q08ppwqAwPX0qBZak542yH_bA02OFJ$!jEk$dOVbGRTzm>fdX9RmrcM&yDi~Mlc z!9iDk3(m!;y+4gQ?R?_EGFde_l`Q&3CE|VhWUqn}wH6&~XxK=+!UnjKDX8u3VX!wI zTaY@IX#>tS*Cm8ruh^6`v{-`7?OmlbXMB5EN9B~Wr+H9Kue+#|ivBVJ|=SP2~f4UqW^sJ?Z?1DY6OOxcvSD|JYHD_88wMYqQaLU1~H^(jzWUqX<~D-qg5jx3-B}8B${vaUv-msoi}eV{FqFyn8)k$(f4XT ztcYN!_Z5IObL)*IANrjD!`|AXqX$<-xGflw7~$n9HA6lbbuXu|PANy1_{Td=6}aKK zWx>@i?5yv5&3$&;w>9D1$**Q*IQ{acuQ;+6(3M42Br)xYHBy7CC1-v6jIp@>mJk^6hLb)NlAM|)fx-;DN!gntL&;on?(ivl zI6$V(oFvX`^O_L;vv3O!3z0`HD47`vUFHjXkCu+35D=^=%O6+6q|~xzyw8 zey9;RXU7T@R*@70lBO|gDojX8*~#h|K(LQJ97UjgR8HT%`c5%_4Y5&$}t(00%K<-P=0>HolX$zd=>8W6uuCl6Kwh7>%>|8ns@&Iu=0`x4rO^A|=U_h;7ClSrh^9YE_Y5Z{YSY z4mAX4t&jz#rp8o+fl-1Mq{PhkSd<)0{n%B2Cpp{c7&5dMD~VH+?L_s6JC6=@XJVC~ zRK^9dBl2mP2DR&II|pxOW(9UR`rr!Nzu^bw`&2=B;^S8@%?xTfp@Npny7(hz7lm?= zY%Lzs7me9ow@@-V%}lKBi0Y|*H@n_Qsk(k>4VxwpsWK4Z7++a$_x(PZmd3J1)+^>~ zprX@)F`9@WhdQ65F%Jle<%VB}?#oQLok{8*pJ-vvp6clx*A#UW@oY_bcS%-j{yo;q z8BGtS&k|}=zg?(M_AQEQ2wz_JXijj7V%@!W&kWJh;%;BwMnHhw`7H1L=#_oiM-4fx zb~R>5mN`6v-B5{F;_l!HkQ~*C+lmEqtGbqWfrHrMXcAWCvExEi5*0Kqua0jZhBJ(` z{et_|IvhSC*)ehAjGMEM{hN-ZnwBdM%NExpwluI$_TBvnNop#Pc@?1wadA5y_Ynt) z#7|h`Wt_3uV6A~guNOj^(LM|F6$~;@Y-vjM3*O-qC(gxiQfOqdj?5-wgMwuxxjcI^*cJXa$55g2~ z+Z~p2+8~`VnudLzahvh7mE4 zAa1%Y<}x@yypXXfjBpT1fV6)sb|1u88JE9M7G2L#CvYB~0PD{)GTFS{*d4+c*gTrk zAoAr@*Xz_9!C=Bi>F`nS(&juT1==8MYl3rHS)!$xM;aYLBYK@Dy8l8u%5EDQx(VG} z&3Opno2wKfKo{psc3ISFM{~8+G+c|IxMTes?Jk9?evy0{37I0wwrjfy>~-KUeq$z( zlI~#qoASkxCgByCLw~0&Ozlnyv8DbF+eQ05MKl}6n7sb;e#-kX=}z}Bu|h#%Zu`{7 zoTI1=6smlH<>Hl=0gV0}&09DtHCKDt>^<>+c7u*g-M?RSSdeh!5)!O)hotD&o(cTz zy9s1$MwzmV#3=O7_E$R7j2B53x{uAS!7)=hnIehR;99N&HMP@B5|1*2A|8BS&JTK} ziuEsQ0s2!vW|nHROR*_kRYXrAz|%h0l7*=fWZFLKfkm3*Py-nudinRI~Q&Kiu+%}WPe<=`X@F1Jq2ogSwH6M6O^4) z_m17ir0)tP;^<7dX?qy|n`ua+rnU?y`J=8RYen6MEvR4LL(UU`ck^|IyG)Vkp=@jQ z%{im+B|3j9#e1~gzfzaiR}{VAW(V>t^;(xtkN%mGtn6k+4jgyAUc35=#G~q0>iUg7 zFMV_>*vnVtNxY}=+c43E376z)LlB6y0F7yT5tel3)xrfeufAA((S@p~E(=mTDw;e} z@)?D)k{UWnjL0>i@6Nn5G&F+{$jq1JtFMi=0D7#w;?PTp&6+oC>x;m#d}{J(AP|;4?#Bqcw+P1M;WyN=$5b zYWRY&MM8&+PGUT=?8EI&NjO8x3!7ZCm&sp>Y^2Skj8X5>5t8ZkJrQ8`FgnoV8YMPo zp&SGU3HX}2~<{$*azgB-Ko%T zGu7xg2t?fiqVALI+p0BSi=)V@FkUJWD1Wov1cQD{%x%fkWJHyj;*?fy{xR7{$z#1_yDM4n zcaB*<+&bs~Y;B?VNkWN^l7b0}2a433^t-;m8oJa1%GAe1w}AQ9gn43zq*g*EFu4@| zDuFnIF{%1qp(;-T zpC;!-ig8cayq_`zgSiWWL)w1MVCBLEt^Ga>zU)GVU8RA8;4-Rsn2Y;^4K6G)=1EJh zJC!3E2W_XJWdSq1y8mMmx8$E|zO8v>m*p8ibf5DU{l@2y4;AudO%zVaP8o+@56y(r==;f+Mml(vN9&))8IUVhHLeZm8A>SQW!cr zLd*cEnkuI06bw7hkL4ve zBY&nOD}G;#A!}FPka+m~N>TpmHUtD1jr~f={z6Hm_>8u3$h~BgYedtgObdRmAH&3c zyA6q21dYSc*@<9YX{gBAmtR^jc;aP*=EYR@-}bD3&)azV(Q4Sj`Ik7&13eXK9m=!I z{}?q2spmxNHp$IM!?m;F*%JCbN zfIWNzotkvssB2)q8FFhfjCQ@kte$6 z?UpocQG0h2`J!uu;oe$Ds5Q!TO7ap5yV+3EF^5YP!LQE4{dr#T5ohL70#8-nL+b+= zJJg4w`9oTmNU1<$$ZL=Ve{lAp^4dr7x*p@=jP2ap$3tEpiCfH2qYlNnzH9MYxqQV| zv`<$FUGc=-Q$ILV=;!X47pZg$^mF$_qcKE z8d!{qGd7n~uR{?p|35Z*I8Z}eU8FKYDSI>%EbKDxJWXy*OM{7a&|0aecjcj^gPG?* zhy40Z1aaf}gGj$kZld1y^KX(i+D4Q&Cmi><2ZmRc2@-7{i<};Fvde{rSw)}-<%X!ZQ22G@E%76o z)ZB_IZ9>?7DZ@^chx4z+vzJ^VTKY=QRrF=vsb5?n>nxeWP{e=4U8`g-3G}$DJI7_M zM~y6p)L}Gfuge+wys=*RL*#^0+KBf}e-td{>Fz(x*3+ur;vt+J=Awyis^qGe!8+G3 zt$>s%iVmnIdXfD?&AC0P}gFEgq0UuRC5*E)pszUEEVlflY+PshW6|t{dp$V zN4dFy$qMqQF!}0P9F!LVao6~!iA+d}5#oO?w;=Q1R~sYNWf+5m!3e~^ z<|l{4uCE*69eJ-uC`FiVt|2!T#&_<)RqQir^|D7aY^sM*k6dueVKLcjLPoDt(L$O6 zujD$u1rq+`TmKEGZkFpN(lsoulK(&!GC~4)y8Ye&z6oX{;s&I=l48}m8+6dts_0|k zv%g#2x#9f#YkaQc{!_f~Hnp0zT}@tX-?F;36z$a2sdZmTh1uig3&GG(;x#7v#AYPS zWI`uJbi`!ME_|MSCw-}~T0sxv%2Tpu!bpXKIe83{!RQhSs)Js^V6zDK=e>;b{R;~` zzin2q4kZ{1nX?s}?&xp_JuhS8Tr^B$;_1O7yws%SF~=HA=;Cl`c>T1hcYgWTMiIlY zA>-pZI-CTe*Q-<=Hn1?8tJ#gScc?j=u8_Ha->l0CPU2DjEV%ik5GSO+prPH1F!|&3xfm)OiuEJBx1(M5VvjGh+I(%AZM~hkhIA79D=8poEl9EpH8bTkidh z^Ox4BF?z|!xPQM-!{YK4qf#|p+pSBe;z1swP8Hmwq@w&&r{bm2e^b^1nw*l=_$75+ z$z`zn)x9-n^tL)_1g*z6qnooC;fu4i=M+Qs$z}eW6VD63%O%=zP-vVYC8r+CXZqc6>FqJ_!HI-wTNU>{)Jr&RVJo;UE^48`YP^X+KY^%@dq63p58hSB z-^Y$YGuKQD4g!H(h<}f&*g^firU(;#oHv|A=Bb#Qx5e z{KciMkJbR{6W|}ZHOURTQZxf`U#YTq)R|;zou}V@pcCV7SN(9z&tYkj`>*B%Kzau+ zJs?fFMInW$H>?d+#3%D{kDKv%dpMP?479u8osP9VKl?`}hSy6*?WrK0Javur=_nd3k1I0U(uba%CZzwWin_|w9zqr)RBdvQgZY4GK_iAM|g3go24A;{fYGi z5O$dJ*mBrS8yETEark0QA@mS-oYdkeLmRTUf?JmHU{o9HHk_0k{33%Sv#WM6@pD*Y zCB3&1cl?HUr^f%4%l?qPpYGiG12s-D^> ziK!Pp)@$|<9ZL?A8d+REPKXj4@lbGDIXq`a*|k#BbMRU6m-b=p5uBgCeIL6N-_m5F z_jtU!d-z7Y<$4*dTWAD&x=x1s6=ar2^m4#X9YcM5AOW#%68a4>ojI}l)x+Oueq_yF8<$qCHV@UjQ%~Fnk4}BT2-kk(^%CB@ui!fYgBcJ4uQ@CAcrJ&2>uo7K zhlI|N#r_&~jcF2|6X2_Tht$mjq(UWGO5f1M^(J_~NmbgE59ak=m~8#XNNCOxFUV4& ze0GUwb^K$R)1R*n6?Q24O=~VU{d%2LX5-0mtiR3)5bSl0k38iHg2AG{B~0WdCO9i# z;%MZkDXr9{-q4b+{pzALgS+P(aE;?*Ild}((wV;8 zx~%5T$k?z~sW+ogqf?@V`q%MN%j9=QEomc2Gab10Fi~}>2~%h1fz^3_^qbGzU}8mL z0*IHavlOXi5<4~(_FE5c5(JK@jtfqJv%!Kh?OQszrAj7d#p@Ni)4GPonpdBCfxwx} zQ^v13yM>sE#gU~7CO*aM%6jaI`o!WmuT(S#-L!FFUgxRYUzj|G)I0E>u2vlTn~mtOYv<>eVEv!mN$vtFx})Ie<|X;b5L6Y-adVsheLe*-ZC5| z2H=StObU|tm8`qf%sckQ^V47TF|Q!7CVnoFLt)#0EMo3I4bp{erxMDSM^c)IHCnz& zSVdMy%dT!K-N>-L#I``%UGUo5@HhQGw7myZliAkC9owKHAR?kP6%dpT36qG8R(2Gb1>0J=N^CXb)zIX1Ocir#%X3fl6 zVWmFjIp^&C-@pAor#`|Y$TPfgxAszan-AT7B4IfVfwM;6kWc?v-d2jkuB7jMJZCex zQr4d3eBQ&wU5dG|Bx5->vX;$|um}8iV%V-^FMEWX1zvFwTi*UTpeu7{yWN%FWq-SM z)bQJZJwU1Y!*WcqUZ#|5j$%)4KgJRSmGeQ*Nhyi;uiRx>pey8gr!wy3EKU4#u*07Q zS=#9-uyHF{{(A)0cDALsPgSWPfQ!gAFk7vi)|XYuhsO@;Lkw)LUCoehgARnp&VE_b=+@fK|8*huuCbBmC~F}LYdw-(n(Vk z@6sO?w++d#w4z6-BEuR2@@ou@Qip?X%Y7)=X;`Pi|@ZNgXwq_wX@T8l(CG z$tg_C=urG+!BSV3%i{;#NKf#4)fe+qc9q-h(M?th3E3f@v5tzF*QC5B`s{BQ=^0?1 zeht>?HXI_6K%cxOZz(iDFb(haS)MfxzGI@OO$xX?veyQGZbLMLwbNp`b9xirMZ>S6iM5A$o$(Z}9v2|wsq z?#tFTJb!++SV9NnrsWMvduKAU#2Mdf8C$tWoJ-mzp&cqSIidWheQ8;KLr-I^eN0Vj z@_f>O9n%M#x0C3jb11|@ywiw=)Gw)O5x@I+W`SxN13A(%p6T}V=CQ%9t7;{b?^aXL z#t2~tQQeQtDb@z(KPZZ?Vv z1JyqD$h+Q10KA&SldrSbMn6zzIEor$eh9S*i_|@s{X=I(dQsB7EvToqN;qZxwm2P? z>V4J7H_}v~L1`TQtAh2OMts%rvTYOl7Q4BqikFCMO&b{dxtTklL-R zt=@EJeEl46@XF0mYePg^gKGFM=QR$gE2zDG8U&S`yi2QE<$dBv0`ezs>UW%)X*I z>m5n+H;lBtXzJ607am&_CJGU%liNbXC`pKr*vK6aRvE3=M`5C~MGkj6UKqBsss&j* zPp%*?Oiv|y@ua-UbQ`nOU)T=PR~t&j|Db;`ZV~Z=I-N}C0?7w0?(8NYzf!h z5B264oS$E@tJO$&NZ_2?C1&TMUX|GNWCu9VqcmmAmJJh7<6k>1N(+|DdM^*@a4(|+M*f9#XpNkzCT17o0_ zp5?%O7igzH`n7%p?DPt8ffv9|Z}6;jdMA;&nbVt?ZSc^7AX{oGZp)U#JF+ET)W_gP zY1WNt#bWNt)R?4P6wk;3W*6eTt{>5F7g1!_;AzO0Qw>mD^aY&#iO#K9ODFVkCi+&V z%03m)$&z;`CF7#VpgN0GX0Aw$AcwC4TP4;<-e!;Lzjr;h~#HSN+RM-33#V}@&O6V(yXIRks0rIws$ajbKx z5<44m%goa&Uapy!F=lRJD+~1d{g-mPajXHYZ-XU(*z`81Xu~h^0{v9+s5%*H#d9kHLI-)YTGd;XT$2Va8s;+$iHpGpn5xyJcS7(~W6vx^P365}z^0Ce~zb)W>s0(!NnEow*Iv2>8XDugeZ*Z*L#L zwbSOW<{zZv+9VV{X{()+^7TQ(%Jbu%Zj|=$if&a&bX2S2OVJH0ya-& zeSgD9zdhhLEG(EH|IItcjE)fZPJ{7-&q>eldsV-^cPejL=n(eKFJSN7c~LYQ4e1G& znMAg?PJ#6TQ^MBC0JhGZ7d>$&KjAZ?LsM@?Sa>ItDws+o*+cJ zjJ8_^m4it#Q{j5IGoMgJ6aEPOZ{K~G@fFgeQ*6_MC%;b%p29+ZVrIp4*T24Ia2MBi zfnP(7PZG&Ed8czU4&GqF;5e-};?>oQereY;8d_#xkEfDPudzvgnp@ooL|a21^Gl=<088{a2TYwW=j_= zsYR%r6?TgaY0OUC@?arD`?BsNXR8Aq#8K zJh6BEm5a8%KEtznP4E)FeoCgbMI$b@HOk5brPYj{%l*!G-pOmKhH#md>pWUd$Pf)> z06@#V3-*Z$#M-lfMLq~!yEYaO9AfMGMi{K|E17?KVj}oe zTkmzbX7{g^au+sr%vpaP#vKW<6|Y3Yt-}SHlFsgAdQV5X|KVm(x*1m}=wNE!9*MyO zi~Z#}c5|J>zq&m$nFoxpqdyp7JpmHza7B}al=?PCj%G(PrrBeV7e;&^o-1f86=$~* z&p_6W>{Q}UvT%AfBlyscp&h8mfK%U->}4YWWshyM)qT;23K4gDgpP3S`O7J>{y}=7 zCZa3i0KHVuF%j_u1kWA3P3!}qn}d=~y?+}A9XQMT8%CPwEgxXJA2yG%#9!tsb(Ook zlu8j}mT4g1Vo0x(lCmcD-9`spR}elViOFwZ7{A5o3WR5s$-YC7@T_ly#X`b!oC4>M zR3JRd13BjIVIA{q14|*1LbO8H6p}s3r-eI?uzpy1=GU%XDA12zK9}WtqxP4KeSCr+ z)1sOphnWSOR&eAdCtO{|BTzw&iodWGPk*fH2(8{k6u1K!+l9{|x3PgMZ#H}F-72+Y zGQ#>;zWmXh)9;(7Ve0&nzFlWZNF}dLN$t3WPs=qArXbHOwiLKdK3q=k(OLF&bi6cA z=C3%K?-yqN^AJ3>%+bQ-P8A7tDC^M!bz3&Mw>wcd9l@^iBXKJHT}5kN@gON%)~M@b z1tt3&K6@es(;?2AEK;SWDQ`twH69jfiYQ)H)TNU1@uAgAGit8&2$E}{E1{$x zVJfMrUg_`^(qLYt2qB$nx5s5`J1x;QQFa(6}87rp@~?KMCaD;&rySt zXyt0LE~i^rOr@tZs94z z5{x3swzTU zEmuliz$xIH8m8VtoI)WNkAphk6hH+DP&v1u!wUX8Zzkw8S4Hm)rF< zidTl;pG1(W=x6xnVGcVQ(a~K+(2a9M_;D*b>HNJEyScop9!t-@ANxo1cBQXZktozz_pjc8jb>Z^T4 zA#E@|fsCSqcgoeq!Rf^0Zf?5os^IRO-TP*pRL^DA2L(2$oLay3r0C0)9!uSh0U%#B z?yD;?oX}sKK2=6L)PZQ%o8m~#NQq3x-n6Wj@?!6MA$JpW8Xt6z`gmyIQ`ma*qS~wn zD7+9WOx!QCHXO37UEucV(L+WaHOSK^wk~&yt@2VM9ouL=}?fcVv~X+W6zI1)8YZy zYh36G_O3c;VF7hxhsW0@2wzPee6=e@rKCqUvT;8TOFn4J5VAY|=JxHXkG-!Bwup%S zsgUw2NzfRMSbO?;ujQO$ruOV(^|f}qtl_8O-pg=Rz^8p_5cRhX=W2Rw2_T*%VqN^- zKs;Ac&cI;#I+qkok!h4kSZ*PFsOd;<>t!X5XpaWxU`{L+o5zhUUY9OAYeyiCVmHs+ zK657x>?BlC*P1nTegt=)>a+0y+*%MI$;yMQux$X&WH`-s)YtnNh(wZ-C)uD#L_-j4 zMo=UIrVjv9RL%AbhAfS3l4`{fu)VOk_gkpV*^R(=-CL<`kLr^R9*zUPB_R0nAH#ow zrgNru?C=V8tM_pD7KgQMX}^FASZ+}%YvN$JeGMWAu-xW1(2ZY_ntQZyCOWQQ`bNE? zq0#|;-f-776CVzv%>AD4k&FEFv~ExDEk}B{v=uVkYSWlV5xVrnJlVxY@hY8O6^E*( zrltiLnuP<>{4*~mS5=&_Azec8iUogU<_>rVt&9H^S`2o_hIY$t zI?a~uX}jRA=wOc^YI{vffX}P~d}f7!&u|Lvs#V*908exp{oqxHU3@Os1E?^lWFu$2 zbcKR^{jq|JuQN=n>n2jnL?fV*JjnNTb*Z-V{pRg_UyXN(7a>C^Gve2xl`jw1dz3u1p6#Ywmg4q>bAvwnbEs1natPGeIW&9q`}%%WvZFJDpd#lL zZ}N9%W^I3@aN7d zWxP2kwasOQlz>tj`iF`5wAgRhh2n2AHL8l=S(P>uYV(Uz+-w(}!`?jrD}?Qq#l^<2 zM`4^Bv(Fnngn%F5h$7RQ7G;g{`NuO`L-aib-}x_jn;gCd)BX)3@j|loRYa)AwOQZ^ z8<`{8_4g)oQPVPt%RF1x=U)qa{|$?u%M%rqX?tW_wqQpK@&jUR_Kc`7Kd1u`XvgK% z5H)#uVL9`Fhe6oUi+b+)J`AT+mv~}iHveHTD8fXSihky8-a7Z|5Pl|f87`1zIyj{t zc*nkPlqsPledNfzMs7q}g1-+E$2?|?emBx!P*PH*eJ|G`P)(Ol&qDNUyl0qKQBhg` zXNwiV{Dw>3$shmoMcsR;yP*cCfXcT)l#0HP6pVwSRPSx>tdhW;vDfcFkfs9bDr0-L zZrI2iT~Hh0hkqH--G~b7?X9>N$!{CAXzlyhF}*A(DBCK^27*dpGh^m9S;Or6_izp*DIuif~+dEppGuDcyh%$6Zo@H zNvqq>&z|{)FARHGBUVsxa{9+?-pai9BdWL`Zr^}6x<;RibKM%=JaN!Q^@Lzmk+bB? zFK;83XuD99WoY6&*8BvB)-U~)3bA;KM=-;vejgC6Ly}_f`FNdO`tTi>A$!775z-&w z8fbFDHBIMC2g79OBr2lO*@u2#XSHsH&F8*0XT9Ogcikx=0umXDSM`)hqVk^chvonj zb*tNiYv_ye4bF>ebpEigG-67#oX&8&J2st-WdbGgVk6VAn`;vDh6{>d&=`Jkfk7+S ziX#r1-xl5FlV#Nc#~tD-%O-l0-dg1wM*cDw63wpY`^8ad+vvRq_Uti<-jM1lOBleH zeSa!2u9{)jflYP|RDH7eL#y$a>6CGbPvx+tzlJp$jb1(J(_e{z7#V3`*MLJpYpf5Ulj&TgS4$fl(M*k^UviUGvbs`(JykR=qK?nMZcH}k*+Ia-&Le-3+Ehq zJp*(iPplAEp>l{ligtoMks5W2m1#E|q+6q{Oa|gNzR)*)-B8!w7n=f97-hzbqIcRk zX^kKtFj9u3%EqMSjtxJ`NdK@dUb`+pv$&b}x}=23@TZh;rD$err|p)lO!Mg}fw?D3 z0?uODKZ2Vt=YMGH(CnQKD?XXhUsXQC9>8VE&e$O^Ts?AAT)bJriTd7ia~;1sy{0)O z<(_zYs!^?;Nsc5>h3d{xO`qB?+Vr&5zIxoTiO$ED|MAkZzV!Mu2Kb{c`-M24YV7yq zL5DIhPsXQv=LuWvzFp`fS^qt@e$de6Y_Sr?;!(!AwO-G39SZ?(QLr@OQyQWerI~kJ6s&+v;LmFuYVArVj2WgtZ{l9>NSB;#N5x)vuEk2 zgkz)LBGgJMyj)ZJ`;Y~Wy%>+Lo+TW9|3r^Txq8@_+*_~u@FwpYHES8_=dmv9yGxe>REsi@SaCt`@TmH9B&0yX6^9HHNo&`%Uy*1)D6^EjDcIzydL17C zXe8h=p%!oU515w~47b{c8Shg`N`J&8KlZkF#x9^){Zq{YlVheQQ;pE!pOMB5atcMcd^Ky@IEjz0&mz}CP4Vp!vBc3igaC5so=8Z@Xo@#Y;tN+hkq+`*Svm_KU#NnT=<-vXCI*Ikfn=3o0 zrONU?zuTet!3hq}okd*HCnV0mk$`Cx z@O~xUQlI5UOU`@Xye4@@r1(abWu-f`ZzV^%aIl}>S0D61`uDW{4l36E2XL_?xdO$PoEkj)r z2R7i_dT4_h@B4dIjBF!5niI%h6d5_reMKlIr^t$!Nhy?VQi}Qg4;B(sojO&S`X$^D zR&eo8ugJ%~#ylPZBs%Mg;|gC0icc709x_!D6{yP>WUU2ntc`J_KH2Z*oTsx4Y7?Mp zhaGxQa-r z+qK>qp|<1b1|qdc;6pVP65;P|Vw03IavLy~Q4owq?vFXp$K3#sjzWL}0HnjuLm6%b zAsyd{)ZgLFM0Jh$oR0^G?iK1R#^;!W8ZCssTDjDl&<7ZMF+beqUpcG@8_IuGhXjQk zWH%?aMW@#HS(V-J+brF#OYdvG2MTC&$m^A0OQ_VM5A}|bCi0iRNaW3f!@n}7iXsXV ziOh~i(^mizSzfFLmHkT>-onTCy@tWaIG!|Bh*^=sq;z<9dY-Q6`Sd8C&mloK>!-US z+eT5%p!g<1BV$_jlV#rUjbRO|@b`trC0_wbTifNHjr{L2aixy|___a1Z( z7ore_*DS3`dOCM5yHT-pLw%OI|Jkd^E(>s1`@L3Hrn~zxthOi>$tEmV49q7Fvm6^I+U$c9+eGT9`WI`>LnQJsDQ5t@ z<#EJMLAt2^9U4-BN=x=!QME5uBDPN=$SU&w6 z4qPFRFFO^ci?7@EVTUf*^oejy>DbKdwJHJ0Ah1s%;un`1a@%wygB)HBRh1>wUsyMINJvL9T+xEwWPVZWpNk&WW(1-!ideKz9a-Jdnb2)v@BgAj$mr-Ag#oeiOroxW_1$INLv|)7md)h_AOo4 zuAPkYIEz}l>32jPL`bxB33W>WGS5S{8xIad!e9rku9=&1} z+qB>)@&PMLBhprw7P^gLUYne5p}%30Tfv>yze+#@(W-W}{Q~IPG<5J7!kqVywx`?9IxIOOcr3+T^AK)-V|F>hq9o&{K;J z(;6m+*SXPm?tt$^&4v3x4{XJyZW~X!dlopuofxC8FldtJ>Sy&#{a9q0ZfeSTfe=+F z8d?+%k32uO6d)vD$LQ5G@>9u>1X>+ngXNJ?oIgJQjLF;_>ZwMLMcvQR%^8V>VmUCZ zP%PKj*AMJAd$ABQ#c?-J+8kqtc6X7r8V*l+Yh8AUIjpBxdS_hsMfaF!iUKgyp|*lV zQj3BBX`60z_&d5$_1?3>*e>hD4B){6SLS)}asvYbzF3@9aotO~iM>CcMQ@6j(|{fc zjg72UQ;A8hNT1A(cBRiiPyHkWY$^MlH%xY(bBuEyF}k|e)j#mva*UaC=;VY!WHl^sMJC7jGNB5O zoFG);?^F@0@ZwMvKKeRC3CN3;9!BZiL87o0kLvr6z-NsH(X=&mdHz1AkcIm;iOk{S z5z-W=$=$I(1e_JKr!AT`C44&W&;VfPGF-_u+rb#GLG>bnt3P`@px zzOSsZR^pnq`UtvhWco=qjJd4-jTKz&Fe>b2KPvE~(+ZmQL3o>;^T9l&gk?F~b_D!J z-XBV4NF_#v;p@%x_qF&}?x964zEpiYGb?ZYpdx+tlFqbU$axdb2s}`X%>dCj3KZj2 z;I9N4E~|5X&?swX2#MZs+Nev4#rVdG)CfTZNMAWU1yF!=I`$4GM}~gMsBjFqp6^&? zN~nd>lQ^BBE7msLUN}a)TI~okQM6Evy*xab?F(|mO)*5vz-Pr74N$Gt>C zIu!kQipkI)kNjND9cx!0dF9>mKTC@DmiY&SaAV5)({@V?pi5O=Sjr;cB*RHA(oy znjuss_zu~W-qPy3j|Clrf{sR7{7b3ZY_KV{>3^&@D9x;bN#8zAH||U&Y%km}R`e;j z#QQ$qV8OXZ3z-czk8fJYRUjfX;^OC)+CMIRr< zMLXZ=lHSmt422rYdydem3qp4XHP-FRUC2K^<(ESarvWF@faIgaRJRpu!mNa1S zI>8Fe_^yH>FaMCtZGgy5ph{XIG7Fs~E@0Ws+_8Aulj(AL-28$O;|N1?vn#n3=EBhv z&LrZpx!M5ex?Ozzt~7TOW0G@qMI8h?GH2iSD#WMvYl){B*7{e!gDCQ)2)A$x!kE)K zw;bx`jEln;P68mx*I@#guj?IwA@WdcSUa5amFZ2ALK*oo5HvNX>3?flokTLCjDbSQ zw~w_ZtO8lvhX-Deo0x8|;0(bGMWxf@=HUh+Pa#!OmWB;dCHs44XMrk-{vm+Adq!Kt z8v348){m13-!s4YmNfJ|Q&svDanSd~7TDObEiLVtp45Ljfw~-A1eVKL{zzX>Bv>w+ zdEj5~6@cZEa9wVHLF$*9qAG)nh`3;uC(wrpxZpk94la0We^r)iAjF?xw+#YN6Way> z-Xwzn*%#nRsdwx+gR$E%BWiLRX7nb(j1-C(us)TWH&6Nqa4g&7Zzq(@_x1)T_!pdz z*Sy~BRq`&ZG5jtftjHT+8=C#RcnI47LU0lR+WA||=}uMSDE?CE`P9NH5LS>-=yHJCd4dZdM(*jc6#!xcUR9l>O=`+Hwgv-29guMUJw~X}v_OaW zdBJ5v!V;XvaAcstFVxLN?VEbn(u3rLq~O>%6@$CaIC+KxZXD8S7pCI|yCLW+AD*i# zA$9IuIQW0~k^9f))!}j%(=kXs^x+%hOlA-X0UUhLC*b7(G#As0NmjuC^nP< z*p478cX!`fY=+2T334Y~6aPH8wlG%K>}P-n!I(~b4*)nwTb9@mvY^rC^RS$psy*P} zj&D@!8;&O|{+Mt(guZgzi~MM)IE>a&zdyQcTaZo0e%^WSymu9pf`R0?xJ1 z<|Gfh|IFuk>>#EPOH)Sg`>50>ArO(o@ma+B>{dHjV;6^pQY~PCWTvb%2Ot29>|6UWxJ;d!16+~V3zaVAqe}R;_lt|hA35Pw`i(e9F zamZYV-R?(zXYC0De8T3rKG@#*&&&7ytMNp?DFF#? z0TIdta&N2^9XU}~9=`f*gybv*rJ(;6n6j^~0I)!#F*q=Fa|<73p6udI*5>gqfS2=t zcp9$ff^~XA!{3vbDuR4X#D#jl7y&I+a42RQzFDDL>3;rJ9B@rfUpC0=RuOg-z zsTpz?JSVy)HDJk5c779OpPun;!QwPG#--@}_c0B^j2J}+K>3EQUqm&? zH{LCS-OH}PnaYPU$GH+}q)Ns-AT&}L^${AW>Y+xey+epw>b>K`U?x0E555Uzf+;)8 zOK2v99Ck1JI8XL++GWbLE15j>8&<7Cbm?$Aq-MX{4x<5BiTqDHj5p+n-`}`dMG1%)H7IM8}=to#FH})4VNjN>~!|^a?sc?F4yxUFJ;r0C{?GTDiYp z2Yx>k$(9`i|EiMSlxXuT$agOZTG2Ra*@HC1;5Rq1jvJhiBhv&jg$P+_m2N{8C#y)1 zg%Tl_;v>dVLH{cyw7WUPvy?neW*|g*Tp)v?KmG=HD=~@lv7rLaTW;3d`uMUm5^5 zH+JA-_cBvh|4Wb{rcJJamQIF^<&T7((d2AoSV?;1v#;alQyu`SU6z~3K^%h`qF&rh zvrnrdm|&fWf8~(equSn*Sx>}T!UDTu3ip8bXT{C!4sPip8)X}t0brgTG z0V%B4VJxagFvc1)d;~t5(2sfi#Dh<|P~VsDDRp8d>aQVYFtV2yw#RhkYWz}Pqj4@0 zaP`O(Y@lD6(2{la>ODsYtWT!}xK|I^)r?2sA5z`Rr@67D(*})de&21YSb7Ys@nGg^KcC82({&hvLjwE{% z0qUG!#U;4K9tG1$d)k+d@S)UIx66ljsP??|HVe{_K(ITg^V8~K^J}p~Beyl!Xh?0~ z6C##sb&uEx*O$*q*LX`iOZpzA zeqNVG@N0r@`EV?o*RfyjT8o1mX zGEc*gb4x%s<`{3vWkAbK{ka&6f4AJ6trrGemk?F-`g$i1&t0&YVcGpNoP^;Z*8r`- zOuGiqkJ@n*iwTa6)#i)@ma-s<4qxRS0n9ZXVAt9ie|K|`@7$0EOZkT>7B8|@P`E~f z(6KfNAZ7vf$Oe~IZh=eoz9=c#@CJ$l_CS4t7Er0~%NL%Lux{vY#D7Gt{6R2CB0Sm? zz~6e-7yK6*4Lzn1z^r&A*acNeP>!?`9dLljd>uWU)}|7O4UeCQ`H?zq(@*f2_%2gr z;VZu_Oy(!&7Zvq{%___qT?jW3tEB_Rgpa1vCMKP!Ut{Le8Y0thS)cO9Mem|mzdTyP zv}cT5|6I=R_*4At;8J9@FfalBlS-lw~@}BPf#jdb%Zz|JQe0SKZH!oMZzN^UE;;i#~Ujev>e+HmX_@d5Q0J;e#%}CT%t*ZPA!HwQRA?Epk9z)G3$l;;K6V7Mj~eJ{u$t zcBt%#;V^@zl=_h2(u_u}n4Uo6;|%MtESQn{u87^?FMK9`Rs^)q6x0BQ2Y~; zNiqSF@X^NoI%M0NS9@4;_81mBAVtIt>@0RF>RPaK?A2top_rxjC=X*g+sztPnOp0m(5!rUir~ViJ z7C3(7_!G*_{d-7{cJ6qNjebD7gcmhJ#ZPHFCaCxc3T{YH{QM$6s%a3|)CvZDiWN@4 zu!_Phzt*tMtf1XglEY_;;_?sf(>1$PJ$+XH@q%Bljlt0t_Z{{ydT&&4@fKwiX1xolAswxPfy4NU~J} z^VaCPPsa_oCapd}?=XcPZgP{Byml|CHvAV<;h#6F=sb#w*x&!W16<9dSrYD&Uh)#u zYXgRoL~M~beN(8$YQdp94NM;z0dWL}Xsj!XEAMj}pJ&kjhH*Glgv`6OIOv9jMNLYI z53EZZ{0-Yi?vHLaPz3DRYgxJ`>oBXPH*KIi=V;E6(`$Ir(!w%Rx5Ssx)fsr< zs=*me6dBn*RC7;(@L_Oqn3+*g6Wq0WgFnDs8@A!t;Cx7?KJ2IdFWs#R-+(YVjhH_ek#39a;dEF0j^9qDJ__( zWGGN?`mMh@9#DI3)DOYI73k0}$iaHeTJG4P_nL>KSTBd;Ma*?A0!pj20YMpb*LF?K zk48>jv!aG*!g`$=s_PgC63 zHkUeeX1fz~?ier||KCo)rsyYo|1X_#K#$mQx+dH1fa^@E|KC0eD|3=o`9y?ZPJT_{Q0T-%l^(r{O~o0yM7alp29rB((p3yQQTKXn)4KHnV0iYff>oMJshONUCKyI?$A4{LZQD5PqdCc)?ZB@C zdtrtx{-~W+NMDztLb{Yb;YqCF{U)(p;V0pqh5dJ)Yc@Z@wuR8@HG;M+eNAf)ux$lP zU8IM$Enu!PA@PJlAV~xbg`YPzSTdmSvwUp91B9RI@IP0%?QoD05b(3$ zAmvKn<^8#i!1fO*J!rT^vd^-t-AfVkA2}(Cm_+;B*yi4p&!W_dwRvI#?|oIKdjYnL zIN6U@bZiWGT(x3tLTKuM9HXJ84&WFKnmR^{pr#HQng7b*%bI=}&mife;tE-~BKf z`aqDF6yb%nisF|+OFSK@%^`5x#d1ADv7Fjcsi~FdzI7RLeLP4QKZ5xdaDB|UY6?K- z?Mn~r6(|x?J3^Tfq+Lf^n+c9Yq34hVzKU%8?DCZwC5LVv1}2s{GQh-AZKmPcs&oFN zWHA)B1LO}1+qFvy+jdYX8mWjCG~}pccMjMO6WR^DN~_QL5dDYm;GGyFqU9M)bdh{4 zwZ(uha!IlES${X&-`6lZfwT4*vqMKhHk;dA8z;H(<^P~Vg=Cs5Y9q~M%|7(aN+ovL zrg(a0Q7co5y{TR@>M3q#59G5+1m~Gc)xH=ck%EUm?UJDwmyQdq)Y%IY?3V6?9pUYV z_Zg|vCI^k_v2iKskQTh#jX}@LC3kdgvDVPU#rSkVMCX+FIZc0Fz*h$tdt$KfoVMpt z_f`>MA_)}V(7{9yZP1{Z6ekFbp`O2%Rsjj^;!Z+~`j>>3ReNAlWO|#8`ioqr*Mdp@ z5Bs?t51n4KSzw5+HLh$j$WGUTR^aqjlbYJ21{jnJ(+9|Oji;oQrxo^(Yn$vd9m<6bSc$LaBau@(lV zmVmL=6%y!!7;6v+0mhn3DIGQou;y(;AJd!&d-+C%&Z#L5$YS+dsL4#MrF1h+FJJY3 zEq3Yn!?)9tE_e}VVJf1MH!-xWYUGNlDXO!`nfucNCC9(o z>m}hPS0lai_z8z$0cZZ^Lf={G_9>GQ`qr8Gb0Zp;ob53?VCz9y(z7Qd&1Tixk$sfr zrHHEi1l9El9{6u$+`CH;_`QhetCF_O63&VauYn)0Qf+}E(_)KKNfBr#66L>GJmb3< z)fuJRE)(v2i1Kv$BWT$=GoPknC*|c)F`NN-o}z7_B*`x!mnTUmI4*vh7a0I^=2pLl4$#%S%9&NvkY3Yv6@UaU#zEGRYx0i_NM6x2o9=C+{m8m+MdL9#*cC< z9I_DvlXOBoo?wy=?9~V+>G9g+^7P?WLw$AI{5B$t7t~5Ht}1|Uk00YmB-wIFHtY^S zu?}HP%^wv^0s@CqQyi}b5jZ9JD($$7ueeMcm`B3_qj}C6iqmSi<2^u}b~BI{4#jDJ zXj)>Csd%tG^)LR2lGnBGPX_z}SIVn{n5bJFw@cChNK!e74ke84=PL5I{3qWj&+>oR zQuarEsHaOJ_{3R`xIjK}(i}veIGb&sxQHLXSCTNw?R~&z<@;>bAtT~2pAd%RE*^k+ z{M+WUzgXQqMsB;hNx5ogQBj(KEcmD7X=-fnfK*diP(x9MnT1w5{TeibI;QE=+fDf> z0U4yYpP;y_;)!4oE%5IuiD$o>o2Hm*A=GU!D3A1fwa8<>d`#mD`+YN8ZiSJ1sh;k` ziu?c(llI*Ir+y!as{Kgjdd^9zhM0p?1yfCd`gM}*J+ z%XX%3%jxZA62}%nf18pGkkEU;$W>MeGp5^GffRU**Pvmjm?v>S2>+8%2Y#d+WJi^& z0Qs@6>-c`J&4(kwY1dwUL9oS@Y+M+1QA=BHk^Z?_K>q>s2+jl!1-&?3Zc$PfULq-3 zy-14lq(~+H$Wd=l9uCnrog~(W)9o6!3&cU7KR>M;9CmXhUmmQa_}{QoceY9`?X(x8 z$j1wT!yY7TvXzh$MJz6Fl;FUdYKrp7|`yqmz zO3R9S{wXaw$z`slh>~%-Ba=SmwQw~+wdGaIuK?cx1Kvo;cR>GYCBb)KEpHz39k}G3 z)bixU&m{Hc2|jTIk<>$rcI{I)C~8E}9mIOH3{Q(Gs{#mbbWs+S6kVL_)a^boWcTK? zkD_#S8~iiD!)JQ>@w2S4te$$70|3OiZ6$j4nJ}^Tn+%Ks;adl+Tb$IO^DBG@ty)(J z=U33RX|}1?K<8I5j!DmGLg!ZiUDW?5r8h!F8NxF0qRt>7fC9IAU{M$f1#Sos5P-m~ zL@9%WR_r<=3Md~#DO-Y>zg+sqXGK)9P%AI+!U*LA{aiqe0dP~9y?5BA#J6_#BG68q zojnYoqi0P2!_hOa)*`)_Z0(fvAS(YRb&VQzb|X2Wtr9rQ=*8?cUa~vcb*F|y*c|?t zq$E$Jq~NaG)k8kmZ3h~0p!I&x7&yCQl6J;+7!=BIrl6$7mkSP&fJ$2WtDlZTC9T<; z+|^!9!#~bEIS@sDbDIsh9<4BZ;W1Fzj4iB!1D`CHc?EBkt~<-M@t!4A2IVJn?_@B5 zgQC*lc)~$Zk|WSTQEh~SqB8%pzmhxCZr?8Se>PiEen~z|idt|QIKw?C?$DV$FTfw^ z84C>im@fhx%mxR)#0GG%YHKGyRW0-WAy!Pw!c|3C7Fn z^#*mvqBSaQU;)*tZODHDctEAbVDr_arB$Sv0~CcU{2Nf6!}5y4>L74>udd{F54Yqb zsnD*wEU$M$VuGJ0;%$fcu~0>Y%*r+=k(=B&c>2vHoMuZ-@x$&CUC?7p*W9eefSo8K1H9 zZ@&fqgiz*0b3T*v_btbQ6Eo_-S>@kQy|aMMpsdW(VWbvuaKXm-TMnJ7Tu8Kj`}5Y} zq0tfI#3&u#MW%7hlFXSDt_w#7V=#Dl!)wb&h1cb=jmhN z(>O<@IZjb_IuA*oBXV*Y&qkGNy)l6@vO49^Mv!=8Yxfp0q>)-Yj3_# z9WZXll8`v8G&vdHF#M-NmUO2)yh04NM}2oN8TAsQV- zd_ab1{Uk%QIpOd*JSt6AU5yvGT-GMc*D(3lJ8L1hK_wXcJKO=z4O;#l0i7FETlhx_ zbZ!u^pzMUs4I(mF`$KlA8tFy*S`ky13)3)#+WeY-O7yTZorCjyM;3V<8>FxH2xm*b zEYEvuFYxt>H@F5Ym>{rDdF@IzsflqM6%p69;sy?IQUQJW&;d>-e^^+INj6wcC%Kt6 z{wUlqY1I|S=?HY_Fu@z5d@AEIF!$*cOD^^HjY_N61Q%f6tJ-NAv_6@8RoXTCSk(bL z1?+o9q=?7Mm2W!+Mw^+_nd{hUjq3u>X;0wgi=YI(6rj=F*rU`CuF=TCg3v zN=p3p&OG^tqX<8;eiNF`LzxpU*~@cwR8E;Kx4(Aj_?Cm?W3cbbV~0j$2{F@uR|*8h zz}Kl`p&{2M89g2Ezj8f&C3Tb`o5_e%?sv`hhTn zaQ#3S0$smL-We^?Dr4khZ@9^ae6chClhfG$#pH@YoFpP1Th|f68*0blf}jPz(I^Jk z-9ih#X`G!G2ejaq^8_)rHL5a3RE~aJ<^y43N-|(ep)m0}bQB^KCVqY|XM;64oE#N& zCG)SjjCI#PbGgj6r5_QRU1;pVMhyrp1@O5VFZljIqAK#4|m)muJ0oW30%7r z$iF*72Z#>4G-uZANTfa`WLydQVsbSh<4PRyL!hbR@^t3-{&A@yVu3wu{=~Z7wsV)^ z|5M$0$5Y+^|Nk@i7A6e}DYkZm#QehI7t)zTU6r<9WY7J?gY$IRtaQha*QzLCG+m$)18x zNRP^Di=7o-^@lf|4v@}mqq9O*gvcUR%H~tiuBA^R&ax)x76wc9 zdE2esWN*<4MxuHLBQdv-67PCK2nB{5EHB^7vtU$YMn`^3eyhOCE{Z(4P<>=x_$-xQ zz8YYtGAAb0dw4dy;H;L!^sA`7YQt!?e!#~R46-8vnqSFL>f~wgY@pK$(EaPI3 zEl$etj(YRsS|4b->ySJUfiobTbSo^tbw2z3sTc4>1J2EKRb~LhN_VegES&Vu>v*pp zK72xh59~OvEO)kj0|lHiZf8EWNcQKZmbGl4&>}^5rbLr(p_#~A3 zykl5+2;@F#^$)p6`VnI%w*Nbe38~y>_JOC@(3vuq zLjZ_?i)l5TgK3(CPYTvy^LXC}0CxpUghBxsD2|71AZl+{$2T6__>abe<{LY;Xd)tPRoLr-*6}_eP`X3;uhVFv$oR=CD7Qi* zuE7iea-UbqGuOWe}yi22D zG2O)latJhigik>bGB;C5$6uh0FIv(giB{M)i!FcAi`LS1#hwj#JPzB1Y~i}HXSZ=( zC|(?{D<4`LP6=pGFa>4)T&Q4rLCyJ&C+vkWIf7{(h^7lB z^ZtICCj(dBf5^BFs%1vJgGNL5}#3wbN$BMl1WK(GdIia@XiaEd^%25^e7!J4l;^1(nq z^|`}NAF$pO8omMrSVlfjfW54@HE-@B(En9Nk+uw!yn(+AEEKH2j2^S)FT13<<1agR zpzm|I)00QC z?HV%P?ixz{YuC{0W#61*fKR0W+)IE@l>@n#@;}h@_L%Z7FNQ5s9q{NJD!yS1azS8A zWi{oPT^{|>2y#JCMc>(OcvrvB_)aPP1q|AUcPxBO3V)KmYW#=v)z8%(BJN#x1;jmi zuoi&28W8tw(>s7BZqiyX%t8?AGqe zkK0}My<7AKQ+Sa7Lfc+~A_1C(mw@Kb4o#wS!5kWpUZy~EXvsk1!DzOl-P_JtFjKki zE)vrIXOVDj?l%Ef$3!;kBb$!VtT(&7TdSLy<*)Yc>sh?=ON>~JX3i?w>=fu{#g<1E zA+HNm9)%8I*z&04+t9g^V^`?GnoJL*G8KccaOFP=e|Vux$K1A#Tzl?hw*gYvkc=AW z>;x8BRWYcu6SBx=K%JeCMV9`T7FjObU{v1uD5(*}?^t4mf5Q^1n2RghNdMa!Wj-Z@ z%jPz!Nd4GJ=ine~`}ZDwqHYjLhjFxi9t4zdAAl0>*cO)zuA$>VE&Tin0V1myE|F~A z3eD|+oeG-U@uLaN5C(HQfaGICYW=9(YSbrJY0}ft1R|5m=P6bqEu^SP2wnl^bnx4IhMM6B zFLoUeduO#FZh(18G4Ys)j~3B-Nb|tCTPJbfDq`*d?F7`Y9)~)0MZ|ok5c2zhfWl zO0WEbrt`yO4Fw=Cl-xvALga-8PyvO=3l;#J0tLv}Dsk>k)vv4s?}E0YWPp0I9rMW@ zIP?TiT!ga1&toYrL;zbPkWPD;e>)Kik6F^b!>jQH$OlWu3rP}%tQF&eD z#5r%KOk!K(Hf6!JTJoboL4*6ZLYKzgC=QZU4O-4?2JiMgPw(1%nxkQOkBzfv3lIf` z1G~_m9D6ZON$W}8uDmFb2+rNupe=7gDm~fl^%4kL*w8)V%5nlX0*eV7dUvYi2Zow5Df?UfhkHc}q2cxZYODhA3!lEo z(UEzXF244b&94GIZL*8#<4(A>wcGmX{D$}6a9L?q6gwQYp~!t5%^_-wSf0~3lqtDT zuTSPT=$7b_Zx46^R{~0#9L}}d)ad~P4HS7bJub(+szFpxQ3V(@X9LS{3;$ot%usSj1n3DYCN}RMYGs<}vPD;q0aMB^XAO!2g zRFVrJpG5=Akw`na4lNrqy-Q>=m@D6i*M-fSbTyDIECXMwm$%}u{<4gs7{^ez2Qoy>Z$2cBMZngOdBkyDu2#ZhlrYa1bFW}o0HMM-j>7W=)o*+z6 zy2I{v*@0EL3{BRhkE>&M^L~Lqr1I+EvoL?Ft^FKJ{gTdvk=x>+4uBSfLpBJ)XpbH0 z+g7}#@|@%affy(*0Rl1VTL+6k5SIYcO$kzZMRfLC3d37_aQ%cXz&?v#=?Bcad}=X4 zK<(5vWtg?#apwDFxSbN<0zLEA(iaB(7>ZFtI5Q|%h-A-ylW_L`E4T|rwTV|U65Fp) zfNc~C=rXV?$%B9{>lTU&E7>AEp*`gM{Y2*&NYN&)Jbwg;pF51A#pM_bwO#_Bw>3sB z{c?os(PKwBWB_3Bjn1p^PtYXE1Rb$lD&|ziaLJWCi+Y*T_v@bkDow{>i2{J+>>ZrP z-gg)mRGYRqh-1{!ve%Qq)#7c!I<2l za6vr`f1z;YeY`eE**Zgzs7-yfvU~luL;4qskTf1YvsuaU?p2cfRM|`3K=$ah15opf^Jaa;3hlO$l0p^v}`m7_~Zae6E?!=?*&1`^a zF%maFNkNGy?y(xJH_cT?N-(88dGg_vd3tYOFgPaDMlZ4^T!X3P#H3(2-sGs5uJ|Y# z=eL$ipa>YAg-Pn#Uz+JNA^`;(MKGZop?8&T?<|gstgsKaQmKm%5DRDa%3YH<3x;Wo zABgw^ZB<%C0ob!0EQW*cpb_qW*;NID@XDzs1t;ntFQvv-X?!y|njzE}s6DN2y4rht-d?dq*yo{i( zlfa9^W15mq1Z^N%br8YmjnC8S~qG6v)%CYo^-K*oSfKJiMf6v!Bm=p$;OM}p+;in;-t;7#*@ zfa83Hu}cte9LVud`2eXF1^`cL`L*O(dG}JuEZS{@Q*H7m&PTA?Lg_I}VD{w&=`kz< z<^fd)7-aKPYtIMzPK~Yrh(2mvPiJkl`17fUkNMnlQl(-sy}(L|<)}T&Tm9mvRoHlG zDo~04_}r)%b?R8zR@#gSHJOec7}J^Hi7w@X<<&s z)Nq8LS{eYp0B*YtQ}IQ#M>xCu2c7HFU*@xqYJWk8oVGi5%)KUJdCbY-bT8;?bSON! zSq*$>)f``qJio~i%Vl;JS9rM0J*@x2d&hKAIMr}d$s1W407hm}$1JhUrP({prJ!Z{ zFU_T=v`S6X%v8G3R>dVAQ)JeoOzObJ6=_K4v2 zImtyMAyo*rBx8hWQU|alD3XUL2C$_*AUy2C3J)v63=MxwY6}!5vkoe#BgKZvC}XT% zL_=XR0A2#IGz2-do8{W?#Yx^L@G{)(o#ircL{Oc4$&$<|O=0Sjtt2^8(aOoM%PnZX@PI7C=OsRcOov~jh(vC6 z-z_pQDozV;%(1UwfwI<#EtVZn)*7-S9ykRy8GLhbUG3H;dPznovl#Ua$A4{veTCjWuECUd|56!?p@?wMebYO27}x`kBBr`SfS z?Nh+bhwK8H%M2X-jegWY-n6z0{~o>zUG(0PI&#|YFFEuC2NfE}-@gy{0e+vO6(oRb zd^k-C0{D2V0(Nd~w^d;Y*I#rTOt4M^0A7Q)L2~>^5j4R{?Oo4kSv0!YWxE`{F$c07 zscKB0mnjS(3xMqtc8AlD1weXcL?;Yb0D9bA(*+YGB*{LW2!||u4LcS->c3j}JYgAX zMqp}nJ}#A9+T%y9eoC}!w0I?tB-mVhlw>O)-h-cO8xJyf3t(rTzh#Y4?Ak6brDDrV zYV5c~C5?%W?#hfU+Q2lZpwRfDm}reHA6f#y2OWA}_UZumpv{U)8Lpj66pt!99}Z7o z<0CZ>P%?Bsm;46pZZhrDG2$=kSL08Mmh_VA5uJTF#v0b!7Xdg2LD_y8 zmd$EJ)2BoVoa=hFfP}^AvaHbwGKz1#*p@*;*}5P>Nqx#Rvt2dXx-3G_UB}AQ|D-B~ zuc$Z3O0Fhc_CT;O~I13L$hE*rA7+s%N;G(u1#JodJBPAWXbq*YL> zU(DL0V39O+X+cShQpY%ewxZQO7VP0DMkEP}+Jk@uKMiMx-<<)6YdeDEmN{brQs0oH z7fO9Yj$R~4eFp+(4@iCI!*aTr+H%=#DIHUq-A2 zuU4urtGAe?qVO3D+ZT9(6=^Asd*pfLz?vsfa+NzOCGRHBu&xzOT}858mUx7qw{Of zZf;?d=s!Rc7eBU%3*S+(XP9vd9n}9cyGudYpis*jD z7;TZ`i1xxUw&C^Aiqa)5zA;hlH26Cgl_ltU z-G_j-%L0n+5^xT3ueY!Euh7E?@?)4>v5{tX9oIwHL^GAQ5rsbni9`isb*}gT^R43q z<^T&4TL8@A01GmUdC7}9jKzZZOOARs!67V2;wTy$24mrzRxa_CxYt$tC^7kQr6$As z#mtZ~T1Cn6Hi0f*@#<&FXiRm47skQ;EN(*RDh)~R?VI*!5hLbP9KA`#{2>7HG-uN_ z(NzvmEoj{|@exv1Z=?oD=`Oq}z5KPQxfX%AaQ{6i*@6_EU#bp*3ym(Satm8!>D&pO;H+DexKc-K98 zsngTfD0bqd=CD2e)7yoK=P+Etj1i9wR3iGiOnu^a99M4}S=E*Gy@|vWjEcLB5=?6N3k22TrH9Q3m4KiTbw){cJrQ*itTs}oCw0pWx(mM8NGw0& zmgCch!@MXdOz+Ce^RNT)(Dwi8-?sk)cK@*I<7nF*ZyRc>wK|}7_k<2G&EeL~tN%s3 zVY4!2-3>#IAQ=;@nLC#izr>&n?ZcM3*-5QHWf;4$pJW??Q$gg7YNaZh| zYkqr{jU7JoloG zdZgWt0=WQ_yBlfW` zY%Jcq`iQqT6pI&C&L<;*@|lELczRkc?m+hC1z-c=YeL|SuxYJ}b^NB)N5^KzWsoVi7^Wyvn9t}7t zurw%qF*bPu*tC(~L(7#~xGiQyNCPj;R*W4x-D~M&;_%~;w$e=UEyG?oN+&%U(td*m z7g|s(O*d*L%mgiqLa-4Y-QyrWEUbVy3F5=`!Tu6-2oN7ujs{^$5UKnlPYaLnQa8w# zyIb@bfJiB~c-U1Qsi;d;07M!+#M1XP0l#Gs@$Wz0)LgEha1xvfNVb@4h>KwG08RxR zY)nsWx2MT2j2LKeX6L#^|4uO3rhi+4CIU+={d-UE_Z?couHp5$vwzd5{q7d+ zt{fi=iPP0xI!#}ku9F^-Er&4_W=Km@79?a-!Va4oP69W*yRQaX(!FV9TF~2fhGstd z7}fW-5LX(pcY_Pc!ukDQ^o8nerEt0(aW`~84E=?J`=RNEI!EN|7mJ!he6md2 z&+wEa`VdM}s00mEvwN!yt5PIU(584S^?pnnt*~%!=sT`US|@`b6T6&wYVcJ6isPk16#j_aTa!ZMvBN){%@$Kw;dGv!WTG%DEq$Vs40<(V~1&E$oFFx;}! zBqXy`p$k13J2Y1(Ol2Y55djpvifq`x;wlbK4JYbroTFqYYCtP`(WgYf*Xx8XFD4okqmQe z8GN;$I^%VFK@SKme__uJyVgI0gBTE)WfBq!MYN=}STDUmHdR;lyFO?V7dkGP1d!K* z2=ecz(8Oork_>4R6PlIp>AR1AANG07xV2^QGba|Uvd-=VW7P)^>^_-$a&+%&YWJ+O zZi+4e7rdSegqeAssrP}=zH zURYcAkp&@1ly*_fNZ70At3EdX62>oOgM)U6vU7Y|fOrSAVW7Xf?H%KKyKBJxj!O7Z z`w~Oukm5B}M^I|08NaJfA8xDFs$`*e=2=lgHNu@kIY_b(Jct?M1hm}(rZ3XZ+*IfY~N1x%#Bd2H3vS`@G<9vAc-K29YR>UH4 zMQ1QqKjOtkO-!$Im+7Yxk|NZ4`j^dX_aiNSN+4!joL}-0 z#Ejj6tgwu7a~shn_2l|DyZBVW;CWE=p<5jESz+0jLKnkPERgM(Z$Uuj) zYeyZ`(WgB7U8IGVT$^TLla~(I3j>~c6@yUGk5Zk}l?LrN`Ixuk?@GG{z$sLEzxteO zU4;FO6;@vx0+H05Gb29fX{B}&)o|3YMJi7KaH3RQ(-5tsMSwpzdg{w8 z@CRG-6Zw`^XjPi&TlZ&p>14m)^VQ9pF}m5@pDirnZSr!-J0{3+`F$a`)=ILio^rku zqc{iRQbY5ngtK3nvPFQXN?{H*%~=9s+E7>%#I)6=*tcTZ)5>!?*qHX4L%We(c$^Z^ zLs5$E+YwkO-^+&qf%kvHE>%GpU`%A(M=UQZ;o8?77*r+BqPABAci7l~iTFlMuejYpM^SD(8UfT?;8FZvd&u zh@Od!(}J4WziJ|n>AbH^Vf_h%)bkgtmq9dG7(xbK&hdwkfgl=8wjB+ArrME~lx4uK zr>gxh1+u606KR({47RVPh`#Lxxi$~?(KO9L`yg(J5IpA?3N8jstf{c z?jfJ?B5*n>Ib`5A#&>h?hbHP2@3!VluuWTsr3Jy2$wi)i|GFfKlocj{WT(^68TKYlN(4EX6yNNB1U!nn^qfRR zx2SOwCbqkLllvlfM+9F0WLR|JKXU_KO&ircq`oQkf z1yS)Ed%RDl?`@(m@+{`18<8+>+`s4D-<2Q)@RtKA=fLvDr2_WuIj{lW*lB;df6{GN z4gc!EWvYUmPtAr;Hh_D}(!VK1w4J-~6-KxpoWy?M0b|>$$-Pm|FqkTSf`+h+NEnwM z3497Z0{b#fQ8u7N?f$J;2hzo6;V!6$qJHahfxq-$rU&+@{5|4l4{jJoijRRf<&f{3(+m~A5MNuEW&y-YEsImH{!_kMp%1oioIpI<@DDm_mK2pJ!|o(6(`5VYzM0Gxle zJz&*R2=F^5inf^M&#K`E$c$U&3ncXbW#OA4>labkL7^8%93+Z+Y@`wmrUhwY%9U$8 z#M^2Q4(ay=H#m@WH^qcS22he7${QUYQ|tgfRbJ)PHYDUz1reV<$ftVpfxQ&uQw1IN z8}#sBLAfh!af^D7-qg6ktj%NyVb%AokWEoiQ6+6m082fDO>*0veT!nctqYJv%|UML z%E=9YChPDVAd7w!)}uIuUZaa9lbg&T=)=F$(f1{S!)%LMCH62|F9RKBeS~{wp~Eb9 zc-ztnHICIMu@?FhhT4=FPT1hCbXZTQC=Kfw7_zl|p6@lU88-GtQ0Rf9B8VewRmwbC zx8(00z}*Fv%Bqr@TBi5CXtLs25!Ta*O2IlpAD;<^zMf=@g&w?_Ir+1inTznYl2NH! zuEvOW)OA9>qgR+r)S4tm6uCv`A_MK~-t?=R%^g<4 z5+136C%sdGh$|Y3xg(a@a^oc6B8+^DB<6LE^|9aL(-gIMFwqk?JGzDSPV%1M<54n% zPfM^ef(c!%Z*Xs;OoxA0<{iq?ColAA!w`!-*HC)U)~O&lSrye|OqsFwepjodMXZ2Y_-$`)l(Gwj?GGV2NeO1o5Ok- zj5lF8i>tHL&N0UMC0~p~-b7tkFLVM`&?JCPpxTAS+~5Q%CN_$cf1Hh)anjV7Y2_h% zRlk*`w2&mV^AQ8%zF>%kW`y}4w&d*5l>bFoPX64ltIsY6Cn4}k?&+zyK**Tuf2#Cp z)z^TIB?u6(Tq~F zTUPWUI$f$`S8d{GG{f*S1GT9Md-|!~lk;)W;BClJ&&sj2NDnXU)lu6?@D~ zAnFyWz1^Q`SXx;o1>BlhROPr8Ejc!L5t}eksBWP;d*7Yz*~C5awOZ*7&uI zTX<#Xb~RpI#wpR9+q$}MPEVoQ4dhq=yW0QB^XjArmumA%DRpf}0P(_NCEu>q-u$!W zPZ%!AIm#;^0}^aLX{*{)B&|qOv&@6&OZ+Nr+itE01K>Iy_vCiayHT)((hK`Ll%5-m zs1O%bXCYzfvROqNFB?*73LgmmjA*V@pp11zZgH;YMe9lPVVJ9fdE=lu?6-3~KCuL1v2$jg1_HR7+yi^gT)H4&>V{q?uW!WrUkn(JDPdn#u zRf`4&-e_+xRg_E#DhvK&7&Zv5P|(^`4Z#)Q^g0>)A-DoLp-1GqqcMt3Q+ui(zWfP$ z^F6@n<@feQhjnAq+50PtKBQS;RtXCF$Iv!xNjyPe;PBhl1;j!?Yk2~Add4vBPgNSB z@QvTV;+-PZ%Q1lVQ1HaNI`1JmeBNxxGt3N3qbS53?KfH?+p5~KFafWL3!sdo_}Vae zqlo3L%V^WT?v3sOqCT^2QJ>*|iuzbsR`GH^c=4GSUOA3}=2kKa!^I;c8WWN9{x_xD zGLqN;&O!aDsQklMhYsC&0PgwfWgb)P)*+1Dlk^E5{NfYmMZIgvRY{QOR1MZ> z4x!^6_ypUHkOpx>Ff9)N8B49uv^+$}h=Qi&AqWmXn3i9?tbh|mom1$;ae?G&A==uP z9okyWUukQa4{;uHJrc1RTl0LJ0&M zKm%rbeiC-RF2y(ks;;s(wWFZwYA~?qNJ7%xHXL!XELCu*W zF{rO<#z&Q2iYkegHP-E8???HudW)pG-rH_jW#ODA72|NDwap>Kye&^bfbU5j$d~41%cqKpD<7m zjCVCv2L(Y?aYqj*2-acwMoiAYHjb}xP_6b(37If=0Drn>uR31U`&Zuf6 zCMNbkfTsD&xJ}GuW;#->+lny>=sM3OhaA2a38{?IfKwO9qr!kwx48IYWRlHlTsno? z+!i4a8-v9X0zGyJfrS6GES&I>*mwAx3dcCvbNmC9Q{lj13h|p1Z*1|K`mp?_t?y^) z21r`v53nRHq_qep6(bn-)Lu6*goZtU0*HZzJw2l_NXS^4sxji(*<0{2pDs-AD(+urXlJ-if0^MrgT~zjZqOL#c0Uw!B_bWM&|q?DC8^BIQ|7&& z9N`o3p;2Sdyt(5#Z;QiKxwmSsf28`E$`}Re__6f)@v(pF6El-86qxx5nxIaA9;ulF;8lFPojO(6B+o%mp@S%<bmtWKImLe&%b}9cQyt*(uwYLWT-5=W0@z~ z)UPzHiGPcpx}aPlh2VG->plmySIz04b_f(i95%YCTI8ggm}9Y4bChpxgVsZyD&>P; zzyIu~A)*CVmx;Z+-_I$?TRU}$Y>wk^oj{DX^{2M|C@*P6aK)K?a0>X?#unJ^{)%-F z9zdP^>7k0r;RNHG-EAc~#pn8wi75f@Yzrf{f z$ydt1**>?QB`z^niGE&;d%({YiS9g|2V`m_h?}lftz|RW)NzKwH*`vx)z4fzsl+oI z;>SwzHD4oNHlN>OytRM5|9n=|%vw%CDY6w|*u~0nVv$|=hH_VXjscyhSYT2~Xkq7= z{3D+ijKK^(7tHB(xEqV|sy(FF1aA8q7!K$nFj^}GqT(2kB|tWCG1*nmT#0zsOQvpF zdh@-?%ZtS=b%96`!Qoy*Mskd;0@ICqjtu(ECw&~L3(%7_2SyqsV$n0Bo93WJfz7V9 zm>Oo?r)HonmD2wc7D@g^*igDgZA}@8X!)S1z=-rOTXLXo@X?dk8@tZdY@>eQgDwrC zm#6L-8{m4~S}8YmcXR9A=wXDHG8~`E(Nn0-(NEB+;1;bGSu17R{KMKAMi8o7@_CYw zg>_~D{=CV{c=MW^S6G;8Z&0_>vA+jlX>KuvKWIrNlxm506#G-9< ztejvMMz19G^Qb&mE~|Up=h@kh{Co~_!pZm4+NyG80<9T{?&T3_-kJ#O{oXTXnO;-u z-r+1Io#=*6ekvcxb77@cZ`0j~9hre_j_IW+;T^X`5qkFignd=~d_Ebo5OCe3f)5d0 zX%FpEqPMHmO+xsa@iL z53TI!{=HOIr&uY=QX<11=jeKSi;AtM*%Y!x24wHIZ5{7~!{t;@`DN#vNv34uxDe>hg>KoD zHk}sIG!j2OGd(?VsQ zS;YqWD!&__Vi}KdV4)hPK##$ZWce1!foh&z=>3hV$=M?%K7s{>GX<=6s-;w(w?0Ys zEiZ1~dNjml&EVZA#^l4S)~REh(w}IA{A8h*{7eI+N+oO3gd9T_9SgV`&BB+%xn+1S zC|+&VHS}cNly(XYv-x8Y!&}gAlhh?#mh(8oFvLOlWcJ)c$&qLEu42eL>ElaG5*B*| zqGSBX$jHdU$&NK2IJP+#{}ZO$KYc@B_rd=6RVA~$>nUes59MhDH(CTU+=N}l{h2-U zoTQI8PtB29+wvE1clsYeznx;MZ!=NqdU8|DEZM*Pf;&SXnJ1M)jmJmJ`~`Gk{@C~? z{j~VTwkqVTJv z5R=o+YpA}%FVH)op@+gCl?E|EJ^^41jgZGgy4@{OC56BF^tdA%&k;pnzFVui!hfej;;9QH>Y2DEDDm*al`1y(P; z^M13hoO<7XC_2V~6JW%0$mgXHF(by@9u8|u1q$91gNo%wNUkIw-Acj5@SA%N|JE{k zV$nIAOIR<&8-b2Bl%O&o<9#$F!r<03WE*E5sMU&8{FHSgajyI3lbagZ(?RwWLdGvs zhm^P^q*sK=gDE?G1KxHee^(s|SP-FGkhEFU?q(qB`*z(`6|=e3TO`zYj;HN!e%*HmBJ)G^7NIH&ZDQyoSAh{q_u zXJ*r+MRv(@iF!FTqkm2=R_0W^qxT)ud|-BJsArf?oZ{)SjdNCict3pBb?hfBc+oNN z>=&BrGXy_jpH@q1eCPKtkdP7APgt~L+KnPSN@S~Y6WJS6h;=VsPj;tAS_W`9+0$++ zaIuD01ZEgg@*q=-4Q;jh`R4XC#2H_v(ibMouU8zutlO-9J=IdVP@}m?V$(|LR0W6A zYG3|UfwScICWYfCCxz<664MeAq?yNuVq^SuU6KxxiO|Iem?_8;QxhC~4jR;rmM~@U z;(hVb{9KRqV~>v%dRZilqb*0s!-eGgBKlLub6%ww87u!VqffD>jcw`WBXY#w0q> zHz=6&TT_9`km(~)qv1_~&9j_Q%>~*9sMl&;GVEz`0w04UyE zhW=h<-iwLh@Hy=lN4eO1E{e4?`_7pOFqFVwr#()c0K zPA~R3H zXH(y}*GsmY6w{)At??*m3PA>MeIzDypLYr!^Cq{YO*+xkZCX5N(&uzKkwhjkkSN(Lo~t!yZz04G?B;Hss3)u^zQD9M+^%ohW`y+Y$5GyA zzRLY3Q*TlWY(iei^L`fX4oyB?8tknyd+#Sqij3d9v2(#?{Ca4ilY4O&l5~zyj;WpI zGuUUliywz{J(rwhOKa09P@CoMV(!+HwnddwS(phkqVDHv6xVo$*-%m(lhwW;*st8G zIe9nDCxbiHhnRsfaGH`(ml?VY+|TeKr_@a1UBU>scf;1QW)%Skp;N zt=kmc#`9(UeAMP?F$IU49qV@Ss8@&&!cBy#nnXHC~zMB zr)mm=BHEI#oxJ~P&3j!@cyd^C*o@qoB3;YWv~RJad*70^Ui1dHHGNufu6dYC;H1qA~?8aCkJ(Q7R~$rS;*Dlcu977eytnaE0b;8C!A)9ql&g zvJr}Cm_yuDyOWqFTajXL@=zwcdqx9@*_xsZroUM^UY>C+Pr94eRxRE8oiW|&N8;<6 z7}S#84<8ZKdq@9444j$m-k01Q3RA@+pOgERhU@Bw{1k0NOGQ59+`)7uCK|8=5fa+l zsS~LsQ?VN+a9LWOr<$S;xY?lB@=5@0#!rdSq2|8hD4ncHr?_a+W5~F`A520nAi6KS zojf>Ew(!`x91$V*xL0&!NsxUqy4fg_=ndjDwvoX!3^WUKZE$iooZtBoeKVO2RxaPll;v<$t@RuWrZhq9b{ox7o#f2bS0SoY36mt=G+rsPJhZ@zM zLF_O0zGE!9oF$xKJd!fULyuZWV|E~Kg?#Lgo1G^snmz$L5_JQ!>uYOhomQKFYd-djsm@{nG(?!%mYo7=*k`mNX z7DTcv-%UPQ%&fyz``MND%9G8_xSUTSUlwA%Hxi{eNj67 z^B#haNvn*cLaQC|d*8vxe)W~M9{;_s|L$9D%j%WLHR*?exV5&A-lS$k^2+PpmIj06 zJttu>Eh}E{fPKwJZ@pjGy^Q|tbNtB@{7+BZlQ!3+_;o`oP=wH#;A69d0?}yTcvVNM zWf+8zKdTQeFq=jb=57@l&0sXxCP#B8I_-E#$<#3H8}KViv)3<#tXL)bY4cVj&1tnu zk7&y?n+k<}yPuVv4&n@*k;$4-%5tceT^xHQlDNM7{x)q^ko>5Kg7 z{y%rj{^_Az{~)%W0AJm9g5|r?p#;;A5cMmsTsvhqI!%u%7bEPNc*eBj?n{>&cW>y# zw<*2sTz4}-Ag&;hKS92-?4va7;+qy@AjhqXw|+)!q^taH;sm@qLjRK8E0ON?qZ%8A zv-xB~B*!CUTIXN9cgN+7y+G1)O)fYu>#VVMKrA?I*H0q z6JwA}d}D&8P2F3DoUqGE%#d^La`9V3&&MsF?{$;5vdyx(vu4;edVY>%yq~{%d_4PZ zt+AR~KZTqOnaO)au9lkKS*LFZ`{PwZ=f6HnkWzd(v6gcq!j{*rQN*3jK+fThwoB=r ztd07HdS9hJErTws?vQ5FME5%(t`U;07gMe7CEf@X@9dg*#rU>KsKxRYoIc`E`+}`Q zqXN%$bZZbA6I+jH+|43B-TbYpLqx2`|`iHzj6WAZO)9TnJX z4ZFo31xJT}?qpBXLeP;1cT8vpT7G&)GtzICBdFR-oaP!oSdjOH_adlc1gG8@+-K~s zr}+~Wn=tr| z_V!ryO;%SBW74#FuKvC(Gx!AYuvVaaKH4d)h2 z(Kp=@O#JT03-W^^UpLDt$d0FlCYVf>QEZ?Pt6Ci14^)4`comOqmftcwugQP2oAW8} znt#l~Lmi5*S(_GVXE)qpTh38@Tgqtbo@CK86ol(oQh9cHJRh+x^Y4_N3;~Lu0ka7y zy0`A@a@IvR<@7?v6p$xdyu2H}R17N*l17F_Q+M=QwG|5|Gb3w~uC7S-gULmajew@F zPHz;LOl?&~SfkNNYGa(z(X^b)=m^tF=DP(38|oUVO42KWd}A0}q|-)B^H---1tt!g zBoW5wkR*-KbZOdXrsZfqMHoS(c}Z95G4EFtYxXFawZm+Uh5dPn!#N*}JqEKJLi<`! z&AeIS#Wg;no8^jpwUhnoS)La%S(E*VKc%mslYYYJPPM1KKp5ALy~1cJwUwVDUkcRl ztXn6M4dsk=JXL=oL(;=Xh5u{m6l?nWN9qT#Cj8p`=UcybnDtOvPkH^RDPBEd=(|l zJG+Goz98n?MjLJE^v+{>F}5uT?ejz8AWPWC$4YlBFDbrxUZ2?Zww~)kuW_w8rq*^~*)Z@%>hBJDFo% zJwY0Z^T8WMmo@Wkb04YyQas+ys5l*!P9JE9%+xN1$&_9L3|d4}n; z`!I#65hcwCDT~B7HTdgPKl;E#k4P!yWV65m`{=ot6x6**<8N7Xl(gwwv}h|%eG7jp ztz#JlvPeV0{PPXt(Uxc@(nQnp>?arGbK>Y$!V*W4P>(EG7JkC2jy}0zkY|c`Hhcy-xKFghAyKzC;-oaS0 z?($83Yu{jtX&Xj4k2OM>aH^MqZ@C`O%ffwSqg_T+tOK7T8oA>8&0VU7$Gqpni%S8* zDuSljq`+b$&XBf1gN0?}Su(@rdO!ZxKVg1?s1>S__CJ&jCaHzvhfroHjt3?-YLSb) z;}6fKuHP+~uun<8!#UY_-*f${SN1pS?t3Ve>Fvli9$fJ zox2{Lp$&lqxW9$ zj;~4Hk#~C?xt=J`>RHmMO)VSm~3Ue7{pbEsafhls2+Z4BLInz8y1>G8QwK7klU>x*uqfPj+6RVF zioQ?#zHGSv~>CRP^dYMW(uMUgN%dQjT;-L6YV+_SMa{wJHv&R-&^4e_}##r(Xi%6-AbT`g71 z^6L-F)}D?lgwDJ6(Vb^Q|NozDI&U2n3e^f#BZ3-9rd&!5tFZ-CY6%4<0;Na5^NoySux)H`+i$Pv_k8 zoqPT}Gtb;}|GD>>sqWfUy?6KSy-L=*)?2G!^RRUQj+~UN6aWDM0FZ&d0I&^!wxp+( zIRK!j2w(;P0M7x42zUS_cntym0)$%ukpET(0O;W*0D$xz;oq-*NBrw5GXHm^zv`%+ zf4&Gi0q}pYbZ~WWv2<{xDSQ- zVeJ6i=ct{?{74AY07P5_BwPeoH-G~EL?{S znB)~5Jp&^XGdB+}AHRU0#Cu68X&G5LHFXV5Eo~iL6H_yD3rj0&7gslT4^J=ekk6rE z;a?&m6B3h>Q&PXBrRU`r6#ghGE-C%>ySk>fuD+qMv#YzOx37O-aB^yTW_E6VVR2(~ zYkOyRZ~x#Bd~tboeRB)ByZ<8>ygmO-Eco%?B>N9?;lkxYL`FtJM*Slf0;0zs!EupM zs5qYCiL0P~a>A$P40=xRE?_X9Rlk7hySnz*IvVREn z?{X~xBmjtiOGt?DUP3~G_Y^Wbp`fDtNvO|J|CXNrJ)!?Cz4((b|B+zun;^jN0srwc z_!|=q7409_{?irM3Vc|;hb;mykPzU52?-Y<26&9liv(UFU7qJfG6MefY{QwCVoZQl zkco~^ojiZI>vo2(yAS4MJF^f#FLmZRJJ3J-0yVoao=yLR^@tJj6*#nNbxLQo=X|Im7m|1bULY*O(MGw%t9jgsL5m5Ti|6LW2u!z zQkt}qF7lu>V_U-r9nv9u)Dn&>u6T24B&2JwJU`6JqTjn!e`c7U?>RVoUeU6DVJ6nC z+^>8E%IMi~DLRu-_YX-1V+vNOl%LSFDka^AF{pB4iEpdV?9}HdGr6NP=0#DV5U5Cg z{r~tOxt%dD@=f4>?x6V=E|C!BAL0ISNPHUr8~@)JFQ%d0wk9MWvBrI}rx7q`rg|4>L|!WaVwI_qAiM<3KjI>9IxhS;g#;+5#6^fe24-@xSXq1?@YT z@fy~^HMPs~TahwV2W(90YJB9!Q!C4Z*~sh3W8X&jBY*3RWc-f!CnpkAf0((#=Bga` zRrC0GfHLKk@+feazu02lr&8A%H(cj2`R8$*!MERE-$XDE>|%L14qgW;GVTw65SB3e^6+zO1E@& zVtuuWq(#Fdy!qJSj}rLboD=`_(_ep-{HJq2!cPF;83<63B>2|^_=!I! z%Mm0A)c!u#nP~s%!`~DD5^UmsqmLPpw%8)MG$6IxdA3pQm{*}?On3l%wxCa># zHl-vQl%5|L^oDMc*e!gVj)mN#o{H9Vs(!TK8TryBRc<>vOfFjcvZy-mcr3sC%np=? zuO~yRwye>-+l(qVd&V6`zw+zzPSO?6VLC;`n`=q5X4<||{87KMU9dLVDtAr0-}Aq>@t=17|3gNAP5y87_U2h63~(W8O>x=-1AL#$*2;xy!2t1> zAO5X_n=jgwhuT)G?%DdGi4;#@wV5Z=iA&8-jSbal=5@{m!hQgl`y~XN7Gb>$8wg9v_TZYL56H37{-#D{BYmFqgazrKeFJ^UkZ{_H!T72_LQ#(st}$^VD4NRQsHFXIdkAnrt~8$IT0$`32K|pMQVm z;Qw6y@tc{+gGxi3HXDH0N;~N<+glMj7S5dOfMNUta z&3xCnyN@2kQ!odDoVpvw+(u|R9S{*&1Oo&y+;JUP1Gjw6miRh;Zd={I%yrv0l-YQS zF&OIzh#LdD7*1b>PlSC-`9KR695y#ciTnx!FbDc^71KhKVZ`NnfvP@qe@p!3R{>)c^2w`y@%AWz|iv z-kBju{?_{7K5I6IAq~mYL#?0ZK+wLytT;4=)UO9*>p@O1lq@q zUo7v2alR6gwNER83rcky`cFvk&uZYmt4_`Xe(2fiXnVnvF~4Rua}}^K+Y*=Iik={X z-upVd9yA;fwuV;&3cVk`KMiuQ;hDbKUE-6`r{)6UeP{0wa>`Mdn`)~bE!@QZ0t4W1 zuZ59gSEoY{BwUBk6MmdJzOZ*_d`a6*o0F6>;umlZYOnGYJQq#f_%QJ1X5X|iSsG7Y z&O=7J82Lo)afa}rRwex-zftH0h8>~q52i~&-`QK1)4Ker_mj2FW530SMmU!paKW@Y zA*BauU91{HMr*Al)-^Lf&We~%<3idtezr2e0AoLrVE}&$7$8;!2EYsji#_>qgC5Rd z0H|}-4gw5d?To4nk&)g<@h`;T^gJ^Re`x}7pu4Yw(sugKq6ZLG_~*Q6tIQ6kz*zST z;k)mHGL4cC+EZYj*|*`<{U{2C2yOaPR)Ze>2x3R;2}v z7KN({5UJ54@yyP77?7nXao20Al}l8T zZc@IRd&&tzQ=Q9Lz=oy&l-T8*0SwTN3Il9u;P~g%1<(wGVWPbu3 zOBEG?Qi9uJ*zBMZ>ms`HKA*pcyL`OG#!zKW(0or52;B%;k9Ou58)CR|gj{-1J-mM% z5UD6BBbJzGRQ%g3kAZhX*P!U5ODIb$0wo#7H7MM*ZPBM!lmZgN7uw2rkfQlgWtr3} z=9dh}R`!ne&1t7hKV@R*`nE}--yV{a7~U9)FujIS=3}&j6`TJ`a{6xF9cw?Nm2+d1 zB9@c5e9(>R!~jHeYBYMV2m|asWUCMzr-J>@oxyb!02 zG+#B4fmOm{7p)4X?htSwO`EaZ@~a6$w#{`clSw*v`b%uGND>_-2__O`ig&<0w#B+9 znH?D5(QhGOf2VSl46G;8XAZF?w3kF1<*<^@clT3Lc<2&NMNQG_QQdX^OgpnP!zQu0 z7E1B^sR67fnWH~-EwA^SO4n=;XmwA4z$2!;uvrpT``5;ibgZX)=%x9z3Elv~SzoJ7Wt{cjo(?XrwX&r-~a7v5^ zJ)Ata2)X+CPs9NdZ2JFQh|BZ+Uo6Vg|C2@ecZmDr_B2o@L;}8E&Ni|zbQ1}>*d}tl z5BQudJ=xGQy$%C}nrNn5Vq5;4uDE`VJV9nrn%G}~vD+*+d+#HQ{I!7?BsJ+PZR1l* z65zqKl^xc6_|3iMeYGnb#$8w>KjuZgbCT?4Aqd<}`8Ti5tJqL71|!5==Qw_swUcda`v_MA7x>8358G!;oh{ljB&#b4_>=&5Mu3$G@s5*4h^n zGz{jcnx1~0%K1>~H%UaMqC9FCJ;O_tGzPREx4oHJHWzD1xyUor5|`^Q{ijV+zjj#-a$OJ2EnUBC1T;+H0YhMMHg zRc{u0-%`CL++x^hJCXTb6|N1yNPH~Bc6jgvO*yY&|D11T5mvMvji#f`0VdcP<=>{X z%qhl=aX?+ZbZ%ON=cj698ZYjuxVs;=tTcHno5zr}JeI~2?J{(8`8>Ao-D@3+YmKdl zCHpgay2yZQ+cv=_8-?{b-J-UY7%t3;yY%m4c7HRD>1Qp^)xZFqHIF3$dtp^@#myLS zN8P5db&A<#PWW@8BCb$&s)%^ej{YRT%bJvD$SsiS`o9ood0ohdL8H7Y2$_1yQA&1CHyDZthmR}To_=&S!%JmzsEI>;;`S>dg8}+O2%m`V@t*qDE)-bXj+w-s0+X-Je2X3^0tVO{ zVSt|nFu*;c){KHS6Eo-Ab^PcY5O=fqpF5B|@rpi&JAcpcq0ilXLmt$j^bYuzQClZi zIBu`0IqRzOc8aQxnwEP8U;x(~xq;~bTOT|6r#<;;UlKQznF0{TM9qDeEDfDD#j-nf zvFwA@{zdPE{QBL>Yq?(;S5!&SIMVUsk@vB$)>qjEou+@kp?`Vv^e809<_5N%+aJr= z#A4adA45@N>oK+qI2}Ex$mwbf*oI^1FOOJXiA^>%#f=r?Sud7qyAq4ypO4G&WKY{w zhZ8rw{)Sl3_X-}%0=8p~V1T)m)Tdb83l^c9p7YswiKcm#+sAldHl^tH)f;h8P~P@pC}(H67?$`vm|1#%HnM0r7t-&3ZHmVGJvw(hOtYhwhWRb! z{hYbb=gUTY11*6tq$Sn8UD;9edmxXOeRk>ob_ZBWSu6BlhqL#AIkVRPIICvH>UH|W zLU!^4uP+e{@cD%lISUwBd#*-Cs(;zV!XA3OR}jl?H16A4b4};V9$*)xw2GYy13Z(5 zkFV*82b=fDcx+B37hi5o9v4nng22*1o}KGUwM)`#uxjbah@luQnu z5Y8$zLuP)q}93Dr^rHD|$2N_n*u})U$z+m89h#$eW_mt%%Dj__S-= zKI6O-a~qSJQGE_#Ro^ajm(V2io+{4gzQG^I)J)$cer9G%-bF%Rc1UzS-)Wj0d~e>| zrZG#`QosB;sn7d{bGXto!bQ<1JT0bg>c^d6dSh=J%eaqc*Bh@snrHPLS$&I@z|^Db z(ow-2H6y2xu6MyZL6`iIr}h`Xk0tBz=|@mpN~4!nq>ZHY$*s}tuXb!o&UM4UQt8Sz zpFdrrQa4@bhu{1k z$%Mn6a|R(@qByf|55b7q3(6b=y>hwjgN#2sLz|=OTOL0!Mfe zD(|qh-?mduUXF^9bf?a1g*D#Xpx}w>ogKpRoE69dIV%1h&fvx1{6LGU`(@$;Tidy) z>ti-lV`B`(^S26kE~>J*zT7s8CPwP6DElC=+h+PJLE=2ph<|~l?rHL-0ri~01S{U* z-~|V*YHmN}{XOn@Y;T*`6OsSu?2{-&$faB#*Q&y3x%yuHSX708@<%TWV653cX3V7z z>IjIupX*R2)Q52Oa>%92FMqRN_dWfU!_P%|S_BE)2p<+WdJrQsC03#XMRuEMkilt= z!HvR9)d!L5wlSqSXhz8IW-aUwT_1)IiZ{wC%|u)i18jC2Qd&;#Nx(EuvbPap$0Tr# zn>XUC_COfW-?iy*4~E0)M2wLY^nHRr>ub+2?Qepj$sgr`{X@A56cM1<02?uU zh>bSH!^En z&jpE2i}$ClMpd`a#226kbY(c-N$00y#PlDK(qt1Gn&deK{_Xz<3$fr>czK2cWrR3w zwusj_;6VIGFTm~CLQJw0NJvia3crwQOsbC;)38?lE&qbC8wHc8Lw-_Xe< zLP72HNT6vEZEBvvb?J7Rn`~n;`?<4E9dYfGyKu-80a3@;Jz+Tva8?ok+3_?wAv(|B znETvHY-nRhI$m+imfuh#na)M^5yV`Eo%Us$%oS99vLo`_jgxYF!;XXx!7(=~Lt2wS|z4)WBw|^^8a99zpI`F~eFX zc(P26@9a=1fQ&~*aDTNk7$N`zysOE1yk>&|$UqE_u>spzhekK}4vzq>+3Tskg&Pa$ zFHWw-n-;_+mPy+Q6Sj#0o(yzDrQ6!p$&TkqnQrzRc8B-XUZ_w;wve1FAe=f}Q-uM* zgH=}Nd>eEz(|on9#J^>~yCv|7n%KDspki#3QK%S#Bz=AB<9Myd2vyg%Qtxuqf00!Q zmGcO}0KsD0aIN9lSN_ksBLFu0UqA*EnUdRBk$rqCVAZvu*DS%Z425pb^mY9S*;40= zfk1hri|9GZ+uOPeF7D5Pj(kpWMKuoSSi0GoK=xq$pWZ3gOG^>+xHPm*a68CCtm1=m zngsrcB=hK!rZFih!|T36@tbO?=bU-Nr6AF&wd z_CASBurF7v=q=XLA%6-iT?(RXBfhTK@T|ZnB%~`@_oqKMIE|vpo+mlM7HWUU?yb-J zMl4cx{-pQ4Rpaw9P1KPo-5phght`s^5bs!n$nlQi0uzDMGkPvB8xN0EjP$mVCVB$ zMC8xg%B^?cSM?7f4}mLLjjz^aiHn#v8w#a!oJ8WBbn;yBP*q#MgLP89t6%2Etx`Io zx)@s61h~DbDdjLrBqn*NDVW1prxL0s+ihZR&^M!FI`*8;G>NKGR?GGo5Pk^bH|b3L z3KW!yca&ZMV0X{VQZzsB6FAV=O*Mb}wrly^=CDMUNeU>@UYuxeFo;syfOH@O=_iQo zS#w`$lasi_HJJF#Ux@4OgL_?>*ijj=Y4eOBNN^VLHvY5%gjQnb-X33_5xPYQ&TiJq z$moUWEL9!sph*)xd&u!BK{iz@vGzN(Zx<>JGDi8=O|6FV99ho}zsW08K;ONuT*qR1y9s65b(M$xTA`6lBb z(>LYj@U2N~kcMM%+gson8-f1_IXSp3q6o|uA5|e=5joxi=N{J;h^~>^9el&tv`S6+Lq| zN=>Y7t;%Z?>4$`nB%9+7|D3#?nuP(ZM5a!2ps&XoM>&OA&4N;lzQonfrabU2fRrJm z%A5tZk zG(E@K$HEqKI@g{JY@8S#zGzBwplx8WCmllHcuVpTP1vO!WS$oZ-QpUy*#c{D)Terl ztX$2FKS0|Mq*21c@UbeM16dnUG@F}ircUVFTH-&|)k!7jkcPfjN$8u$Qq=j;NLxH1 zmpjp+sR<}!BBl)KKz1YSBQ+D?}x|83q9GgA$y2O&~E3lvxt08JZBU0F-i< zs`%=Mkq#P$1nPJR--iA2XQ<#;Tg^NVq@KSv?|H$76=r6pv}VxwN6*xK)uf(o@VmwoUFmZ&BSdL2cNdwEThhxU%W6 zN&hBi{%O!BAWTb3wM4yfx{zb9Uh;$T6n$%Xxe*q`NA~@`n~Q8Jj-j%ktse4dQ>$ zAlR_Ee+{j^eto7)7*S^9-e`T|j`C$xXiCMwgn4E|*3fpmsU>WJPsrIDecMe@)us@) z_g$q|pDJ_oT$#HE2W@O{$;*!3DT6CZx-!uK+;oB3{GTq>Ap<-*>)ARcxQWYn$^eZo z-^da;GzwZ|XuB+sEvQ`LDqLw3g&TON6jCX;{l0Fwb1HI}B01LyU-vS1uDl1IsU`C$ za4ySDTCT)ZT*Tih;@#6<+YSL3~+t4?`0mkJy=%T6&;&w{66?_=6eIu zpnq`VIK)&YY zz%8y8mK)!8r-4(U-~0c9L#C}l&Pc0>vUG3j@c1NWrx0CCij=~iM6a7>3|_yh2F1!E zHlmqeAoxGk9DOpsv-N%AV4#m5%X`eTWnV;0OfRF&$YoY%``EQud-{PIm%_d#y)t;d z8-m#;AewxjmQp7?7&+L(KeKI+B`L|$^j={78p?cdk<9ND+efHHxhK$9QrTbE$iMSa z^Ab(5!T>u|=G9D?|7~{jqv9ude9Rxq+f{wfYPqFs{H#YzQ9V<*?jv{mdU@C{qmDm%1$F zp9s#33&y4NcKXM1#b&=N)61B?f3wu8R&P=?S&(dK(V3I+bCgS^hpAS`^h>bIs52v~ zXA`Ftm81;xC&5QVkpn|~ywkV}aSx6J8FEp87&{?bIzi7Reoo8S3vt@`Cc%?;z1vpS#!?M%eSV9izHf!M6lmtj0KBWu-HC$;G{*MyZyxJij~!q^`~n5bO8G?8Hz zVP;P^<)f?bipOG|hF_GQ(aSFPeorQg&)L%$3~ItV^@vX)_1o_YQr%W@0M0k z)`KqKG^}-#bW(Mcu#m1ueTQ99h&oB7#LMWD0ftH&U7n(6g{W1TB~o%D6g<}622XKf z3pLMm>7QPTx%Cy7AKvR}Or5wDzg+qdWAtg`%O1njYO_zbC|1)Xy+)toh1e*kvX@QyXEug*o1(>SgGI%50N$@AMM*jL&r8M# zp(lRIhtJvE^yL`yg5fO7zeUgeOC>N391v+MOp!ZMzUI5cFLR5++V`DeF0#=vX*(Ci zefV%DhF@T0fyU8rF(coYPQsDME1KdPTcwCVnZ}*$=2`SA?Rj0d4I7!R(!icf@9vua zb^^7d`0JZufa`(L4^a%TdFoaG<~$_leUhSuA6|b$yCBM8^< zkc+o=9b=yxB)>E-DcY?7^ya$rUlQYw=;^Cy_C`aY^0;$Ig{~=$0A$jXQ6H?Co`^oR zyc!F8QZ|sm;`5yuolzV-bP{9 zL^zGjOiB@)Cw=}ncNgaKs3jLnpjTTmsmQ@s^# zCep;Q@+(n!jz3-MYP`vg26O1pQztm(%s>!Wg7qR+Wvy8KjnWW z?l0JBw!~ny&xU+!Km@gdyYl^^6!UStj#dgk;gGfNMe&6l!m^${3?Mh!GGot5i|qSF z5J7v=#71DR#Z9$q&84H)952x+ETsT7{=wVubk}I)@iIXUav$o zQXjfV$ftMiBCtF`QXI0{U<2t(<}th7VLE(@H!)3~?P8&pw!u6Xb4ZqlG9$PJ3c}UI zA3RJ}tIXpIn=F?GNq>a|>XBdR)i*S#{GhO0G$p>`A3IR_mYaARru@x)L_Mq3?$cX>xq`N@1VzOYuYB@CC(Z)3m!x(vUB zUXsun_e9X%kk260)kcIb6tFNI#b;_5ZUa+xZHg!*$UBxZjlZYpeB=*2aqr!V#bW01 z%1^vacs4d+Ep7EaupU66i>{aYd|0mQOom(lhqQ^ ztYPvYtktd8R+1|VF^8IP3%g0iY@sSlf=PHb{VR5g6vrBMsU3Doy_D9dGMUMdiXP$^ z(^tgYpJ^o2Srt`$bMG#t5^VX90Yx0eM>(oi6MCK5rlovdfx+5)Vp%fOD9`PBs(OiD zd|*l-4WP;D2pX%(yN^{l^@2x*V>&p$+EbziCUAa8tZB_o-l}_fu{b=gPZcx09!3l* zD||e>3oK^Xxr-Il!~$)saOu=?7%{C)%aV`=-OrVT4(}-zkEuIrb^NIG9rm8o60d-G z!vK=?W?5*Tt8fPv!fo$gbu1kiNy}AR1G~X{Z zD(ncZOWY}*m!W8OLLz7;n!9yYdIPB0f%lI#b=b5u!pjN@gqPpHHs~kby2mHVQt}lH zixaQ!74sB{ROVvMC{~IjQ5@;)s+naCwpR_b_oE&eCRrP4XR9A9IhW%-TaJoja@LM* z_EsnTk-}-_5}y?+y^E=Q6Cm*U_-%+k8tQBRWX=VhKIUO_=j`yw8V%erW-W7F>9DTt z9A!|2QEk{N(K&Aqq7A9BT2YDS4a-m8{~ zTJmhu$ufb|PNS%MN7qDr3!XI0%NC+_--=KxKO$>HLXrBPQo}n){{*i8msI`pAL&5P zY_D&$PGv$|WB>xw?Q{s+=3KXcuvJ-xj}F3=?Lx|mR=x#7W&(mx)oM$?R$R9jwSX^4 zf$YQF2R0*1eu-cli(LLun{5f4Xh(if{f2D`lb?OwT<%>Rk%elqbV$MDOk3J|4)JiC z_0M^%34A>1x!IP>8z5vf`}K?&#A*`_+9aa%_U6qa-gaUV>O`;LImco%!{%`%T@A|o zJ1XO$Y_u_b@41MmJJ*s5H$}mBznJi(siQdww{d)M4}p`T?BEJdnTubUMl;LSmEl+R z^P^npa(BOP%pgNHV25`16A#A(n?>vAHN^4gSo}H!91K>(0|UFQiJ(#IN!RTSnrnS> zX5vV>@;_mozXjp`8XPx(TQC68)1<%X`C258B9b!1BA{f+>Q+^+$BXQOdGw1T8G>#5 zv!m%po27n;&+Clo96BQPK~#Gv;2 zF*!d{nKPd~l{`)l^rFI}`hctc zoPt^4qsxGaQ9#&v9Iuy~ttJ(n3V*4S29vJt!^lwjB1`g_yYGl0Ux%M{)+@z&od!{z z7585?*Z!Jf8vCGdv5m4;S~%BFH{EtlZS*9W|MaS9Q~43k0KV*pNC%Jm)&&P_5<3w= z@57%k;VYa2+2QwV$uNL$+c_FMikM;9EffUF>B=Vf{VF`dO~`CFl&=nG>x%6_9)(nD`<$iDmGh0jyspOR1TPtY&h3i=2tUUIA(+! z<=>ToX(0gY5n(HQbbn~f-i%Gi8zM!OMo3QD=FApHhT9~Vg00ez$-Y{f`zLOoP*H(k zmw@%V6k>Z?P3D{JuQaPtSj&i6gY3sBriXCU7qb>>(Y!cPgrr&6S-rUs#!1QG=pB|T z>8T>a5=_6fadZ zA?n(8k3Lc}xqIU#B82fWsTu|HPjclYooY^TA}53%HXUr;eUBz3=wCy9->etXk+$j? zoK&aX&X5p<=Yocef}dW%mnZTaHsJ+@A! z(dU;b@jLjF)#-z43Nl)54(igb{Rr>W#DX09dZQe|c-siwWV!367B9B6ZHhD^-~M9% z!nP<^>;8T8+}}P^YnlS1^*JaUYGSfUY806s7a^6cE6ct2wb{9hNjc|{UIC!wrPg0I z>PgBv^z*R#@pH;N4>zY+lZ#02WCYMN6BnC`H)B`cJ4RDKxj|N~(tTd26*WPzCpk0Aj<6TiApxAE zHrnOmw7^(VljZbGXRi`8%n?4ax&G0DrRJ=$x+3CE;VMmmr~p#WcSqfInV%bnumGrhZi4T*<_@>WHiC(5VtS}kWiXtU%w73D>~xkuwG=@Cc4pj(MTF) zrD$6DKEy)Qjrf{=@CBUNk@`%;QLH+D`OAYW^P1;=)M!K0E^cF3)Gc)~OD@@h zJD;FD_q9Mq%ju45qmDj=%!p#0J*`K-j!2dLGYygZ%5rwCp&V~p+C9P4o;5k~RnhZ; zzF~2R4w5~>i@mOZ#r$@$m%i$)#OF_`Ws8O{tWGuM=S^Z?|73DT(;?mt>Lc7-R6Yp@ z+jQ487Kma&G@L-PyJhAy$88)ktc)|6uc>Y(mv6NsBYUzi|6m^p zU7TrUewqbtod|8jTpoC{HkEZJE)_Vv-KTLrByQp!6o{!822Z75G5MqWPi0d_+k&LeS9NYU0HqblQWSBPqW9h-+5sx><%XoE$O?HH z;jBX4(;w_E0hC-KP`tA_7=#QTp?vP#%pxPS&}8ye!g_VB4>Axd@LBMuZn0+mL7hX} zX$=EtBy9K=hxLdkdGR=f zSg_I%zOr6XBwbMz1L8nO^qUg~guD{675TWo#{x@n<#X4KnFk%5`QA#tn}{}P75Mc63X07-vm1UFqU-*3Tm&Q*9N>PR3k4Uo1vgfyBT7# zy#N>j-=AJ{I8~R6q*3z^4BYS+Q`SXVWlL*VsP&@gQ@@u&4hF0nSB0lnYUh6RJ`wV) zz<;l-IAW?EeW#D~IFWOtUeuf*0Ieu4i%YT)KrCeM3@V7)b>yerdseno>_W3F6|A}a zaW_FhNwEz3a}UkkFe(jF;%dS{UJQFNY7vP*aS8)6emyyaOl_4z=#AI91$_mz3IV5H z{~EKy)~@-TdyWEm^+3Td4U~uhVT8rBRfKDdb9c^7>1`*QhkYd*Ct^ z1HfE5qo&B^cS^tlQnvN;(3b8=%%x-iu+QY^Ojcyz?!(7Gz_xiQTL#(ZQk?STy~(eY zQ5WK5%dwHq3FQWvK1p2yMr98cD-|6pckIw(@>zmbqYN&?E=g%Wa57;e;6#28>0=UYFew7rjN1P9-&VcM38_$eJqx}auSz3___0off9MV3$tI*nD zbGc`YJKx%e(~dMB^8UpQ*&s1U#8wm=ie&|NEapz5 zRbndD(NjkN#oWBJ&E;9&Dn~Oo_Sb#V8m*j6-RI2D3JCXGVlh4f^jdGs%y(P*&z_`5 zov@sLCJ8XM`DMoDmgpa*{QAvoP1CQrjXVbmAwf>w7C}i5Um=m zkswA_e7C*Fvap^*^1;4(4Bgmv9e=f%)vm(2)G3shjBKtIYWfK|g`e^)CMo)Iwi#v0 zQAk<&$dHxF(2i@ibAfhD0fG-UepsLDCN;uqS4j75*R#fXxtQ>-k3^WFA**W{{c&U5 ztp(WKC{o#!x6Cq#bgK7N|aod>$95Iqz@fc{AP7%3_!4xm8uRdl35CYt<_ zt$?^7luNSx+tltgJ4cUirAwea`66ll5hQ>shN+Q<*N37oNi75^9v*x)H>Cd%^tYvk zIQB5!6dhivNS(A;LUm_Zk-$RG*jVHmAyf}M)tvx+3E{%%>>K8=zR$&cRc*Osio8y| zuGWQrDdYcvqsxyC!?Bk}pqpR3ST(psc4*_XPn+nF`;(oQK&yj1f~=hEpz|b$-oTAf z*}fM=UrzLm*vlvS13|1D6R&blqqmJ8S>8BLSt~u~`~@K0p>i+jxO4WIo5b;-^-eCw zUDESq0t-96i;U094=l|GAl&!|YK1|Hy1dzrH^6c2HX9?HNBRj}$_YN-PJ$jZuDgSh zGXbDLuFgQm{6m{n7+|A<$LOB!i5>=Mg>U$WR-C_GBq zR}2Oqt_-+gX|a=Y__CPz?q>H)C)Ncc$DfNStCkOGSAQKmt<`U#&lFnpb&F%hc_utd z%wllHLKQkCdoVmX)#kF&ZsBH;_Mxkb@Wh1GrU*+~zwGtmg+skiNd4Du(! zHp<}<>jEBd58TL^pL>$XA!s}1u!HYQT-6#nUxOx+^c7-13VJVqpt0=aHDXsG^P)^o z6RoHa@lAsBy)SRh%o^(JORWUHCMLJqFUigNV&ajedfl5!vv~T|W!N?ipuRJf4(2q~V{C1MV?u zj;99B7qoEA@BDFjl!(1c3|64HKG0Fr*s_?xpZuk8r?2$g4LmwedI>cb)R*(Q;z8KU z`xeTo%)S<2Nl|s@Z!VmZ;qLBqTQ%FJ(HsCeGJJU+mJjmukV!O{wtGqQEDLM%GBp`D zaFL`{!+04hxb;mFq+&~6o4&Ob@{Br?&WbKYUI(!>R_EI{exwqfv5m|s*K_{*@u(Ut zlo9ml14Vv=gRwH@G#0`-sZ8BsdHi=8ubK(63YFBNRHu#)3Y&67p*ju=Tjp@50Nr62 z&a^KAZ^pQW`x29vZXpi~2S)Nwlda^FP#v)J246T&lGU3UJi9%0yK)ncXdKe2N*i&= z!X`GJwsbkfVck|RG=l@p#Rh4J#8h}A1&XZGJ+ny!o1 z1tnImoIl(CNHutUwp`?QZnTb>+G{7YzU1kk<`W~WY$xszmj5MQgWfJojLk``fkxm= zPYh8QI}ZZ9$p)u>7Up=-V`5eG{%3hyOQ2u0^Rh*v9)8Y=(USYX#z8*BYtq)c89yEd z$WLhu_-sN^%hrvj6<^p9{ZQ_9;nnIA`!v|C+O3tD^pm56j%iRnTZ0;8zS6Q}cc1;F zld@52a^S2MZ5QFFNZT@l`Xk#z^t!e-c9tNRs`K_#QO7|eHalpqP$YA!Jo9a@2~7xv^f_3gz>bv$^{OOpscU2e&BxD#1hEXs`o;l z;#*KGa9M0)gW`GHK4E#l8ZJDp&u!RKYIX7G1`5vfP@x@o%nY}UB(^wy+46JE8V~+PW^bFQRNy!lZ>(_nCa&1wR-I(r~28adURN@F#4Jc9W7; zhzEM5TgEZnjETGBI6g zva;n;(=4>AHbt{e{#s0Ou-If@)rP&Ybnu>sT#t3OsjcMvHqf8IbE;3QS~^0M3HoWu zf8<%@klptM*{CQh`1bYK@YCe?$eD4ikNkwazIKyWO>Ijw8(}dvR`y>DvzkUQ9P{!C z(+^d3-gex*Jp@+IJ|R9$-G|i_AHFi`eNiR^%xKuHt<$l0!r+t{Av3{xhAU&pzu78N z1+fE%^cYoVt%ZC7Z_gPrpReQ(`o243^2Fm-ca>=J?8q59TAsT~#gxX3?VTNl?_W%a z#dG}d*}8Z#j)B_$%3U>iB_q*6J|}9=zLgYP`Pokmfl^}{1}1(!zL60-3Ny1jSr+Aa zY=~5~kT}muR!lYi9c!!UhXT7`ft|OY@PJQVpCvX0DH?s>i75*FwTgNw_u|<2 z^FuORHWgp3k|yUU#qTjOB@n6ey8|x-F#Asoy&kYv^%j?)GWaEQP^s@YTrhx0*(Jnm zn%$QpbFyo5>x<&29B%al<;9_u+er`2Haq3=lBkL=qrVQ5iv?M)Q-M2e?KQW0MwOwU zrN;L@%Hh#1*9)@HTu>ujL&7tbSEpmv5Yn5Sw*$>$n>_KegWv|&WJx+fRsGZx{((!QcL6A5Zjl43-4@HI2I_@{U40IWmMa3yX_0biWexZ zMN5$4E~OMN4nd1k+}+(>i%W3{9^4_gyB7%V?(poq?>=X(z0MeWo%JCZ`J9p5_w}FG zoWJ=N-BxYzTH;GR2uu5J*}MSHV^sBkv(^c|Fn_TmoxQ-8F|CiLM&H}#*Ss4RC)ni{ z*2&aYs4rn!s3mSgmzn(hJ%)6mze+@xlFJ1K&B$SXEfK+d*t?dS6H?v$sqzGs$kNr4 zo`^KE7k8zhNYchL8W+N_*C){=ldKD#hZ7vb|k241FfB7ZIK3!c7vBoJk-5HqZMiiEl3t4E!iYAs=wtXScx|7R*k_-}&zdDCKtUnyv+A67O- zA-^+NRtyc;Z5(>~!$FhVR>hI-C1SMJ#F{1_ob-jfflk;;x91L90zX2HMF!ES#4iIHwkDqHv>XT*e^ay=N*xRA7^7io*Zw8 zQ;xeV=Hz{;S1M?xd10)13@?;k-pN2=6kPtuYUVGqJ5Efe18~TwwFw=??k&L}4Qwt! z3Aa}iNwKNWjBR3E_sD9n)VyHN^5s@>S2x5RL;t`Ya(b?sVJToTjDfVKRm zps{${cw8)DiJz{h#0ip`7|>$N^1=uj>U|5^xoTRDUc;D9->?-B*aqX|Op-z_>l2cC zZL>?69?=XH+71?6aiN}it@N-4)n=}CB*C&(=T42OYWL2sLVnrV+Bmc}b!?bEVNu&> zzi*$Qg(9|5sFTYs#uSrh<_ds**XFn^Md{)L8PG>gqRD}s9@Qj#C%>eS@=VX+B`CqtUix`=fhCwd0DPy2nJEH9rEh;IybiU(bWy#-yN~!40 zW<S$k&52zg31n^tky6ShGtMEoFAWs00u99wP{pKjsrCWT1!BFhO{$Sr;161ORnp`z9tO=OQ3_*Jz`AcJ1>$#wIc;7sr74p$i%?Ph(W zoMr3^XHU9B*kPsk@Pu!*%x7CIOOJPSj^Jrta6ywb-_a4<4y13mujAn{=?jQvn{)vT zEx%7kWFu2uwp6=a@|=Wkmax#|bR?_=;fqly$eG+%K8FN;*`lomI80dKX}m8Sq29vruaj%4oP@ z&*AuhlV!^we9W^;qHeq!n-_|LXnbU)vUhd1U`$e)$p@DZ4fJgPts$a&VJ4KS2R8`v zbS^w~^>A5VPo#a_{vK&aO8B4=aJ!e8w3`mp|HAMP>X)KrabNbcS(yK>RHWxi3T-s} z-+LmmQ<3JPiA+u_Is7pV+)r#H(H+tQoQk#TvEQ-pqY#=7EDU4K=VGd1Q#e6Sx5(No zP$G7Z?MQtT4*qzd(Jq7ab69~krP-lPn?jT5&A4BRJ`6JUCYJj57WUbEaVrU!q-XsW zlK;Z@MD-^UApgJjjQ>M>PJBE(LtfFi`#q$KTr}jSw#1^}r}a{H=L(mzM^rV3$(o0v z$g@0!yMn(qEzR&y08C!#K{@axyg+SkGSnF~X#@Xyk{3iOEi8DM@%Z#cY|tMIKeQQ{ zY86wTPWu?a&v2|>{RL?7(af&Hw4~9?;CIbu}Ch!G<^={|J-(>>t`G2)LL815inlp41OpE zO4eih50xNQ?0Y>Id$^q$Ecf)0kzCd)%jTO&{0&@x+&qKW;~YkTECOlWh1m5L*wJiL^EYaZv7-AN-=5!u5n?s z{Kz$}X^x5*ng8qNW}BqlU)L7+Hi7=Efh*>OD~}f&9m`SZBjGJ#8kB<8bZ{psKI{+)#z(%DD2J)P*DGj{ znRfoX12^9L43S(rGm$dUm1>hvwiuzMfqR;=!6TU?89_B*LIoeBm!DHtFHjN+HOtBC^U7v8M+s{dj zYRZ0@jP5qLbaBvZQR?UWVt%t5^~}Dg3Z;Co^do{Tb(t3ZF3;b%HFIlCp+}tFl5=z+ zpp9d#NY{|J8~IP^krY{Fe4?`VxkPW%e(3Ee)q3t`{VQ2_tucuOGKBFbMA89^R#;f# z*n^Q5=S!WgbBo)A;f9eC^>eq7jP*5Rdt=UooZwQ2ENA)-wB*AoyfLQP(Hs&^H5&#- zFxOs2lSl@O^O=*RF%C1fI^m?X~_%+6FhyPOUt&iaE6Zx*s zn5RY`Wo`9Km<);wR8PXiU@xZGYbaK z7kJrV%QL@g)-Px^>uKwIbLruB@MG0)(B%r@hXVbO{ zo}AKfi6LGKQHKR{;g~k#0&+i`wcBi-M;=#L{U(VSC9S9slxx`l1`aP+6QvBVq#L}~ zLq-AnAUsPyoaw`R)6bt<&q#I5{eEU$E>Q7qVeMm#kH=cCNG!bdDd=#|$$lnzNcSZ) zCm9Sp_p23h!i4+S3Z}M<${9UR_QuAXoC@5i;;tQ(sNX;^f_9H}HZgno0akyf6 z_I~HDI>DOJ%F5HB)64`=q|*>h6V29l0ylZ)sN}4$mAAk`_{1ZwTQQ1~(3|06VN&WM zu21eS?&0*LU~xXe2+n#qa~>_Lh(5?i{?e@94^>Fe?vJ}&n6dbSFFU+~v-aZ$TdbuYWcrtgZ-+ug{rMksw{lMOX zt@{sLMCX^cF_F)gOAkR%?DtmtDjy{9bQC_W3ikC5Q=7`M>9TU5kOk@6W$b_*9&dGh zFfdZkC-i19x%q)A%c2|H_;g+ksi!(k0YQ*K%v33C4LnK<^D5a|2PLbfmI{T1w!@a$ z7b@6j4)Jm98>FP<%I3h^?!pVqzyu$YcbTrCCnI7meV5*bg%)QM;j3+5(|xJDMJ^|! zz}a%z)7W9Y09q!i%0aHW>XjL0Mt8%6k*b*jPb7?XN3n4y0k_l13lRv7^v$a_J6ic4 zx^^+8&)sk+Fgz`sGyb^6KX5Q<@Yf{Si}l5Q40F41MJ~P^v!_q{NZCh4mcI8hTI%|H z=5q4kM3GshC`Vi2%N}!{jqLW5ip#VGIn5(F*R=tP?XJy z4w$Y83prkK`$mObR76-!hj^;xL1{ByPw5IYwE5RPIicm3dvcP6WfO%}nTq!=6sxaE zf;|YLO)y3DAGi~et|u`I{rs3NI=omu?uXZWZ@Guug10G>#lbHRK$4RJjYfih;FkaX zYN61hip1P7OPGABeBF4c&i^-7>3l;3rWvM)TE61Ny$DV^0Uj7Pw@?A@mLGSY zS7c{{ZRjc&Cd;R9%!V#Xh)vxE+)KCV=q9xb7ti+2iDeHDDY`uFoAIdefA85Tt0~EF z)&;V0U<>Va!fY9#oew!xW=WfO?L<4y&8tBtHm!^Euw&T8?-Hda^^Jc1hBOZIudWe-a~E@} zY8$o5<<>_m8Oq=SpTFZ2rWbxtI%RdZ7jMmu?zQ9P9OMQ7+%Ob3$1`mrT7GL5C|MC8h#v2h-6~bZZ@o zB`Jv}O33ODl#CGXkc)~2SyF{EkwAx`6h%vi&xCfqbyuc9GXmhCXUgL6{*rtXCh zb7AD#lZe@<=eqH*;Wt-{Ue?IbIM2Nq7AJmIAL;M;{;hS{xpmD>U}EB~ zjKaC}HH-CWxrMWE_1$iiO(q z^2JRjQCssRD+rn_5YK`zNP{l%EqM4^5bCDb+og_QoIwee>oE4kI)gSc?ep!0!ooSr zl7t8mF)oyL9uj=#d>zJ2GTsY~ty8xTMve51c5&4ae-2x`SV$Gk!L6-+4Ed(E%&70y z&TRMj@gvI0&o=-Mp$nhqK@X~WjF+l+{tqq}b$6>@FDrki-t#W}wP6O>a{vRy5%p7a<)I#7-VUfvthu4adt(;e}gdnsT$U@;cq5ATTr<6fAEaP)+ zeAN4YILfjA8{ARe&7Vkk*y_n7<4ly|NV@R=D27EhNsEY(<5Mw~&k-Ii&-bZ?9sbp- z?O~dc630b)Iabv5>&@%RVw$9P!}B*Dzu;)6B>fswE)V;Lzlkq#yDxhc?oWsRa?J6# zWnf2gvml!!_R5!cdGZiTlJ~spOuU$kakl&LJqo?jP4_H3ntiYY z(KGl?WDUg!Lo`Ox=$ydNm?v3 zT&#PwXY#f} zPetb1fb9fRCBx&-CoQ#5A*8)B!Nlzo3DzVtd$-Xxkq*vCXFWC9OEy~gsVC35?v;se ztU#rLN{vu;uJ0DV+!d&_|9FUBZdKvNerYHlx?M10|*rq@ywH zvx$R6e68QxZ@DvVpYVq=I2ck7Y2h`4iHP759_MC;VsI<#l8yM3H>y#PcuG-(#cs-H zwaRga@LNdLvf>4EwWo1D*Cj7pYsXxMBAC-A#mn536*b0e5Su<`BTHg`^YO8i*87z< zNX?syf_U}$Gh+JiWY>C$kBqW0Z=G&D^j-o64Y_27`^}WmXp`!K-s0(^XW6yB`ofz8KdNyV=B$W(! zjD9`{=}MtlQ`|FB1EM(=^2|<_XJvp5KZ9p&hN$YljI`{fb4NSL?WH_UV4WnyEoAdE z9etl|3~`IGrN*@}C|5`Y64wS|FLF__dmQAi$CJVFLH~j^=WO)of&L%& zo8Vq8Pr;e;=s~Q#qp>L32TqZ39GpA0K+Y383ZtX1QI>(o9Gb5d6_|5PMjBzEOv6WP zR+hpf%WCzb*YG_9$bJHmy(3~UtsVjJK`Bh=Rl@jmK2xHVz;b_Hn-h7KW$2WMvd9uh z7`m&~;N_;5Y315Q**`FE(!Ts$g})I@nF#X{)$+pL{)bfQQd@@D6~Y{2Fbz_A+}DCc z#hEx-`WX!`q8$25%#u*&GO%1%8_}brLrWYS+Jhj3f~w;Z>1HK+Vn4%-7WyZ)`%UUHzukBHB?vB}N({MDXvASTTWTBs*=PGVgU^opIonM(4c*;%oZd1o6#O0GzJ zgRlE}eGZsBfJ|p>*x>uC^ikUMQGm?XrKyZu@#6!yl_`^Oa%4J554BX{qra|gjO3Yp z_w612joknri+oIFa7_js6)OiQX>n0x{@Nr`^TZ&hSs`~wBP7bLbX7PKU7Vcko$hCA ziqOhH(|xppSt|8DL^t?gcvbVY@o4Bu{bfK_ps?Rim1^~rgA$g4C@zw3`5WXf$)e6w z?p9a$7hafLp3NtX^=N1$^R@pDDV6{|zbP*C|soqdzOwF73=8QY3s?hvuEIESrMcEssmm~t2W5R zwWm(LR7Hkx>gp6G;_C%Dy|Whto`z-{7HJp7rrY2bwfnh#R+5amL4&JER-c0av+xsI%Q|{CY!&V%|u-lg~=L&i5TNK=N?n)+{>_rF6PfENvEiEF& zt!Mxm?LtFP%r8TR_L|~q>GU`jn4y~X4kehOMBr>&UUyuS-AdMhN{0G z%NK{nOMPPsV~3gZ!{*mAJrLLYd0c-z$f7XJgh|_!! zL&12z86COm^*a#RIXZ0Tbi(S>sab=Ixq*jL9>(fwQ={cL=p=D?SGcW|d{Xw&63n2* zYLoQ08f-=EkJRqL-w4ZaD= zAgBM)9;r+PmQnEoHI>*z`dP&)QX>gGimXF^BZ(C`r_dW|a+$nq#|TnTFJD=kJO_M; z(x`imGK@V_)N@eW5R44ui|WceV3$t|-k$|Imi|@E=W1fH{Y7SWUC!u70K4{sZ9qX; z><3ta*K(L+wrnLLU}feReiI{crp61NfS^}T7p_M_rR2Zgc9*gvJrvY-@PbzI=Js*K zz$-s6*srd}dv3_)c(Iw}h(i5Xq&9x|9Hog2BXp^-YY?#({n%U=|nWWaP%(qw5x{IG%b4^nl1YXe6| zraw#-Y+0+nenRnWrbJgT3WD{2+f@r6X%(iurm#R&2x%~Y@txhJDz9;7;SQ<~@JU{lMqWmK=g-jC6Zx8uRV) zL950mokQAw1;>Qk){+Gf*YA4i#mh<-+aS`Y5Bqj{?2*$rZa$K_7B-|z!!&11%~Co1 zm8;d54{rURVLDyHL8HdZw<05}G!@`i4?borPVL0Ml~E$yn0Ugg800-G%x5cY3V`!AWl{|1PbcPa7d@N3~orl_bd=c!%9ihokb z>w<44L3^iTogli)N~G0{FAr?iMyFKm+41-au3C$O%h^z-k^7}z#!bCA$R9J}2nJCr?k44V*D6D<^$n#eqztM3uO`-)2^hrPxMe9GJUK7%T^4hACj&{PD8~G|5CTB-b)Wos$Shxu*tmp zexeyh%r}U#(!$=1rkSCT{oQrbiDXH;9K=v|ACg*Rvi~d2v+imNd z*ouNQaYdTe!e$j|%lPV4*x^uL&D~GGZt*1bY&?u+@0+hF_v4*$(tNMr%3&^O=kXN7 z*@n;pG(C0t==4MBuSbh;M~F-x>srDYfY2os+AU2%{29G66+)dXX!8}GXyW`0%ouGA z=!O&KE8DwTGPWPA9shb}tc3F$rIxtFbv`ZQJ0r8DD~+8~H{!JntMt1cOwhBS3|{Qc z5f1*Lj};DhRhnd;Q2H#T%oDTKogw83B<*JbVouvOT^HkVyl&L&T4n6lo>n zSmuY%BgOfK9@Y@h;2oGQ;nt^DzVKM~OGzGJhC^0GNrUF)YZb#^3< zCE&5fn05BhW@56(qY6m90r~Ik#%{tXhz)pDl3HYZE^gpIj!`}>FfWX=3MGznbvncL zRK{7V+su!T#n#QpkU?1E`wS7I6sS_XD$gkDkKi)or4B(P(0c=S@N9(lI@Gt(y5~+T zQ)6~4#ioeaQWIjn8!3OWp#XPq!K{emG^nKJHY1`b#)z?wRZU-}c;3TBzu{NXpZ;=E z4_Vojv>82Ov8TqRWDzlg#uaHbCKpAd3`}W;VDM5FTW&Ugp0uGjj6k?gBYLsqN0n5g zA`f%WTLv}7M89S#oIEM^(2H+XaB^5%QwUd7RJrw<=8AzCo{L)nFHwa5?qV=IZASOe z&hAR_{nverqWM29tq9(3N8etIDGVik;V;M<8v<_4cOKB3K^Hn`1}W&H`ZB)9g5FX% z>beC($zhhPhm|Y2blZ|ShV-4(t*GIo`?P42#i<<+zlO5Yy#^ghhJNCTU<#6)bEk}} zfM}{CWPAR-dS4Ix6{5ZrK7Mj0AWDi_rB!03;=Veb%V65Klb**|_Q{#fH_Q&L$Lr$M zf$uE2)UY=WBSs_J!wp3wR%fALBZ7 zqxJ^A!qDXS=_((MyzVQe6LZ^a3Y~X7iucBnU4#Uo2;W6|5pEC@-S^rNl%Kg0P}Btn zNkhEuA;<2gIsHT8%%w{((cz~0L+ce(dirah!%a1AgU_AkjYg{@M`}Vg4YTW}f4hlS z>}@W!?l**XuaEY816EccbH9}G`6ykpQnz-gzmm&LycYsulB519O0L9NizPBjMMf2J zWSdwR7a5abNQWPoXRy&Eh_g1{Kse{yj?bDu@zH-~d$A1Lii<{0DE)yP2jUoXqI|iI zzo^6NL)?wCXS!pj;8!+{2pZVr5Xh=v!U)8+m!j^Exb-W#S^ z2T%5)Z?4;(`&Jh~o;zN+QVT-v=mb0$C(JZa>?+0gd?dv(KC3;oFT@$2Xfq-UI?3sC z`Lz9)2N0#oeM^j_DHZjo&pTHApTBM55_cXL-itluq@-9}_0-EMIVDO@f#oVteW&Z) zhm^93g68I`%hyPk$7IjmVMr>D6#4i7itef%n=IuEgUhdnRy4hyXYwqJQ=eX#eSLWf z%_u{?7RS|c1>57Q@`+~SXnD9v{()0oDW;HF8H}#toB&v`+S9iF{|7&I#yrHH9QcAZ%f}x*c@;S zvLwxUsUwv9%&hRh>02!)UmDu4g_sLUk(|cY!LLi2*A(zFl+(u^_WzRcHR6M_w`_ea zmr1?0P*7y!l?$b(gWQpZvHbNoJ5Fk@a^{>LuA z(_w9?zP0uH$MDh`!w4YTObGF4)A;0;a)eack8DA88_B&rewtW__>km3aLgCPUT@Ti zu?3EC%3Uk$vf__6fv!+;06z|Jt1wac;QfFOLaZ1>qb=!PHJA~P%L6chLSLKCZ5p^i z>MWs>1x76k%|2@7K^y8k zg!pANcAd#gzQIYYJhx?Sxh=%BF?K`gJ?1q<7QANSdhgMSt_o&sA(y4EQO8kENrl3l zovi|`{%?C|hkSt)^IiI{@QIccousC}_)10d>1x#;pZ|RFjIw$kFfH3Gd5x-pp3}k4 z^Yq! zq+f5UYKJN>x~re?Qk`QJSN+ECYi*YWr7&cf;OG9NOdCK5;`-rlZGN4z50cGS`F7yW z6gSX;H*CQb{5_$-nV&((Y$dDl-WugB;>9Ar5bf(InM+aZ7L-)3OXP(Fy~M1gnyk*l zGHG!4gj7<&s&Lx->l5ojQyXA&AFcUCV3C-%NdrlFxSI^?mOClDc+4MX>TFX33-B8p z#@r~#*4#3km?BK+bV7;bK0*0G8?eTwlQ8Ez?lvQkf)whYkH2)Jh;&qlxtqv=MtQ`v z?oA2Z>TvEbD69(&0l&F3&g#++F>zwB8 zh?iOl*L@p-y(3b0601O(7-SiS$jdIA7l}kWj!AL(a4ey_Bi|r04Lv+eV0*`W{-j=| zA!)W>_=|eUAwdx@sx6UUh3fT}xw~qtIhWB>tx??;ripoBxXLoxL@5hXBUlYzQldQD z2d3kiWgh{7Rn?w;qilJiEHkba`ozdn=^fZ%pWXehA0gv%@2t7;)poHL{F$f7(v7A` zl0P1O3D^~A6>S?T)=e(8C7EEccX(ITe}Yq`(e$o?b-M}2#v17*#J)9S=_t~5slXj# z3~X}rzr2d9Fe1@W#oV&+{e@V2`U;pksO=$+Nd64dIUowIL}>LGH3M6~^yC89F%O1l zHbkUo+&0eaz7lO^@{Su02|)OrKfInuitz~pI~Yd9ca*Wyw{(ca{(=!S#(>t1H@T$& z$i~4+`pyml{R#iD+v=53;bgwszAvK9gM8Mdz+}tBE&XQAmD60#JD`|4$NIWM<_7KH zQ@6;F#r~~&K#9_O@!+{^;>Wi6vNT|oFKm@{5hZZIb4b;rICm8r0_7>M8rV{BLBnOD~J~K$lIqG#uNJq0kVowF5DD* zafDb#dZR0*x_BLggvgmO3;T%?+DRoZFSW!z8z++8cSy7et*u-1V7?-Su_5O~M#Ka@ z6=Xi4nOc=M-E!S{Y`wzg`FIQ`4|@}Q;^F;B@V>$r?^di=!1r=1BQ-^|hoTbgRvj7Y z<|Jgk#>6GW{oH|e2IcyUA%^V0S2Z&}k=q+72;U4rLn8;GMUTB_%<}nxPel)H?Ob8j z0I8p-9}%1{_uk_D&xt3QqO#$l=(-<^s+;wGuBmR+>OX`1u~hjOE~{crs6*DIPbShQ zut#%?j6M>oq$|iqvkF5TkH%FyAw+J-s~Ca^ucw2|*tH-z^Zk|HZMJ;U&#+yxY{qBF z6P0#01*w%ngNXQYc%i3xw3Q=mX@B@2Mp`qt>z|YUFD{ktln??vx>{=9dc)oea_kMv z*u2P{n>{&or-l!b@Pu#?*A??uQ_Jx-yeO{R-bB_RXf>H#e+!eM}Zk?c_) zz)=`RN@f+rI9PBv@J&hHvma0auG@-4ey-x2|BtDSWj~wv&u6Ty7D`G$-OcdnsRWzH zbZJWrf=rvyz7WFVN{fSEpZRf*p!$yuLY}i)pK8RqM2Lriuq7A*DSB*#57d$De>*N2 zU8y&1ovBK;m?gaCOsX+qJRZWI0l)Apu>0iDnj5L^{gyApwtQS9@cEYIksT&cwmMGA z(n*W{x4AO>m(K9Y%9hs07f35+UtZ;XS8#t5@ScvPgq}|Q2g2HQWNlH=?%0xi-JfD~ zb_AjWhm9~9ItPI>g`rc4IiA#l38BxY8}+u_>Y+F;xMvz~WKNnIiXJwmoYFuo_hpi- z{oL7i1)+jI3LjL?4RG+?rH>SKUjg`n^=9Est+AmSIzF;E%u_FUJ9b1pcV(BBVqjc= z-4GmwgKlc99c!VNS~By8D#EO+ILn1Z(&l~1xcf}cIPD30Wmi*`YL*kb9`eRqt6gnM zPP0!^Dwb5eyYTkL>DS>f=Ez=MuFD_0X4MbE{H43CI+D7bmke~T@Fz@sGrTTU5r__2 z%vUQ?jnduNv@!Q_w`D;g;MjAOtAbx-Z2Q%+Dx}W>ij2`P{81hWs1yudue74!lJzI= zH%7SxC4b4!Dl$R4Cscj>iqt)Hnx%ICU5+;wHilrVTx}dqwTIWY={j!QeL~NvOYPCj&wkxgDff$Dk=Kb@kgkVlg9DHJ`ut}VEb1qf zt%^Hm=e2fF#T{f$-5HM@y5)L33?y^(Irc@p36A)25%DsQ;actIw9@>mC1jr7r|!}2 zYM=w%b3W5X;n*axkA#|$E+{vP(r_c|;>vQ0KE%$fGK!>fZ${j;XAC7o;&DPzHg-jy z$!5%92-#iNY20{K|Lc8(GubMVBVDY1HmWehV%&pwWrbVm6KzYfJ^!K3Gvnt{;?}a! z4Ct4t0!}%r5Bfrk7UC4@in`d^qm6Ap>CMf+F?%wqc?{7v<3gi$9_}s8{Vm-|w&KQ% zLqAZjgbgB(ixY>1>q?4~STY7NYR1!C-L0L93N5_N8i7W_1AX-hwqKu&&Im6X#|9rsR&0AuA z()*jn+XMbCbD|{b;0Urz_0NYMM*RqqX2ag_uTLM5+q0W$`9vAFyMB#i*EHN7bkRI& z2%Ta@EXy=vP~_c|*e-RNS%RHe7Kl{viS;@k%O^yBxN;w|xCrGaOY}-^|LVi* z(d$shR1helA_6YJXp-Hw zD4|olxEX}1K|^Qulx&vDRIg2LL5yz`g_3fVrN_Ep8y~DEmYj+$Q6gRQ5o7yBg2`BP z0;U`8a{*wPMTIuqctI$Tn5J1IwDW5yFqN|2Vhaj0TD9f7C#hVlO?$ZU!G}H!Z!20p zX1VH?^K7-;dFc751j&}SD^?N{7wh`0Io+1Q5L$ba6k+&d4_*);$}}Uz{mz;ZMv+C zQm&a~PDP4U1TiERJ??m_=Wzgdmftnh@t~+Eipl>x2n06fDFuX7sp?%Svl6AYjHp}NU) zclWF!y!!}u9ccNyI?U3O&=aBLk0nVd?0y7Dw2nN;ie|9;v8eVdqjsE6XXD$~tzyl6 zYAQts;K(Pe5&U<7PP4F4+9qUvEO87~30~#>_Mg-ZDPug8t4YLE6q<|jy%tuMXs~I3 z$n+;x+D)7(=uE}WpDihvbtYz6Rnjk3rq(o(h=EkD-`w?@&lN*8)0a&`X!dI$a-~h; z&O7tz#~XyC&f!uEtt9&|96b#UgI^Vxy#4!7vKIFr%JeF8Gewnpy4J*}Dd;XKx8G8> z^m%1xM%ItSq)o{x85++7o;Ai+zCb7m4>Q`1#y#i;GoA}0#jiN_`4F1GKGm1ln-pQt zY~n9Brcv_q)9AF}Xocg4RKpWq6wB2CLNZ(OZau9@7QUarUUC6h$j_kIqxJVs8p{{4 z3;>97%KM^Ifs(cxQFf>|k4G>BaC?4I)k5Z~*J-vcg7y%V_b>pLTKB#h--#B(*YM^R z{{TMVP(PhOJLc^-$HrmOu#J6IWkTZu{;gSj`iQhR5D zPp^n{vA1Hd$#{CJQy`Rn$=k0@xVp}_(`-gf%|SLvvTmR!Ayh3WUaX91ht)Sc!}0)j zq8b0kpz&Y!)Ri`F+Tf|1Wf$~BcA`fOK4vd?1$IYE_O-|qV+B?xPnwvAknP>66JLmB z|N0sh9N$U@hhLF*>*1cDhzzp~?LB=XIQ!SJ)*M}Dhjl~nDawOJ4W;ZbXMZIPKT7FM zb<^mDWiX*{$^LQ<|Mt-}@1G@qjzHC^?Y#J3Kky(r&MI<2T0KEyy1Zi1e%iOT8A4?h zxTk7TJ5E|4Cc~7wkU>Pej6|>00OByc`HvTw;k&AlXwRx1c?($H~7 zC>%MGu{`be&)O@oBP83Rzbo)SB>vTFhj9t}+7;W8t+ou4fs|VzQs{T(Vf>Qi3(b-- z&LJZ&jTA+caTT3(Wx0g567mp-1&jI; z9nJAMkg@YWaNk$lk?J76%~TYAfIp4O)L)Vn2+t8zV}kA-dDaIJ8WnF#WTW13jBRrc zs!ocmTcI>BWD-|oKO;}|XPIPWsoh-&t_{?|iP#QuR~mh3DE>&P2;XPusJT#p~k^;AR zzsro^X;~@g8L8vdlQ#neK1hk{m;1qW(how<(OwfT$}KZg3OS1%f3k--ochC`XS`IR zQjcCc3p366lDY3IJ@{?w{B#|Wx=QhHj!;0X-7%pUB)6Ivt8oBHtZoZi=ThEkzf5j< zg+A`A!q_WowkE8emm)yoqYCrCZeT!V zDyO383d{XP3!h!G@#+Rhou5^{QwB47pC<2~ro&i|H)1;UH}fp94VUC@?}--L<|$dy zS`v`k##tv*P6r4yOi9-?qk2~Jv`xIbM^H+6SwB(0Vb8kXYdP3!5V&3H0{0(mekJa{ zMsT~~-ccz*z2>OWH|B~GU(-}(Df z_}cD+mFBl=TN=aIKpp+6DDpw1{zsUcN)0VqNV`4PHakjcQvOZ{*dTt@X_4(19T#47 zOIZTJmXw#XHbM;lD*;vP<$@e0O}u>x!L0jn@e19IiPY&n{w8Xn5J&zyYnO^)LqCh+ z)YPpV4>+TTAtk>yS+y4BB7rK{k^K$1es%dxe&BYiqFd2wGavY{Xye z_@CH^&8dI~(ghxsTf0&P;Aio3n2~OS4!`Y)$neu%3D-`kn^@S;>SztodPhpx3j0j&*uOAG}_x-mHu-<8GOcC3t0MGwY$P({biPb1E>jH<{S zI91T+7f73TmC77BPu9!q{NPx8qjBB_0_* z9Vau$-tT%93WObQs)qQ$d-jgtmgLnx-xlI~XB=|bQeNEgWo6F$OQ!}&KRrK3AmS}j z^!{nA)zhN<5jzDK~g&NY&S~m#I7>?Y=nn z&{ft{+G6dvUo8)U%G05 zO}L+K95(%U&+ZTRP1s6L!g+d_fq`%$^1~y#L2c<@4t6y438%x@?N&?kAJs_BRgt#C*M6~ySdKz(w|9q|cmMmlYSZ8qQRN1(=7 z5?O3iY)U&EIb-dgfqW$ctiW8=kMxh(Fmq+ObP23&@jtso{yis8{eRH+-VW`cM)u}h z*>nZQpE2||(x#e?;GEOO?zTA3gN8{g8K#IY0cwM2*z|2tI)Yw&{lFu9seL^WPM#Ry zPi4njju}a^>ReJR##`=FC@t|xi{@n51Fa8+u^(~r;MEM!Kt4WZB|07JH^YdZIVSeu z&>r2kQBH}Ct!Zby3A)TvWMo)PRBQd#b$2^k@CO4%FI$Qfv{AwHH&T+K?A^Q0kH_4; z>-t0Y`h=F9uu$T&WRn9K2Pe{3B0Hl&IQWM}nolsMBd$iRNXi8;e;ZS$U%Qa-X?*Ut z>f`jO8SN?Q;0XUFw(Ro&d)jnYd)`~hZOWDC$xv@r$=I&f9q`0E2IKXY3xl$FDtpt! z>tuj02Mk;h5?5ZkAMPl+`L4ZDX6nmOGt5Aw`p{o28X?e7ScdTl4q3P83OXIjjUgQV4;H})1Xa;B6KX6Tpet&=OjXe^18l@!LU??J|>Srr#1${U14*v(v zMc&i=3Qnud=&NMUECZQJA@2A0249_0CX1MDCsVo&U#J$+lAG0!=indfEv(b zw6srr4CEOn+ue_@MfA_R!T*RS+&CJ3cWQ)3Jc@7}2^Jui5E@tudhWi3I4XK&H$W-V zo9xPveg;xIEg4f+=!PVtmc1T4kvJ@!w9Ipv6Zl{5{*w%kwc8fv+a*NzK`{3exr=w%D zoJ+dTtsLs;Kh+T7EVK*xBIht#6t?Bc{r;tH4{|)FD191ZE3&2#tERgcj55V3YAmDS zXF#WM^dsrowxiUxU=MHD{;ir76qKS5tB_LnUc9r>VEqmjSxL3|<~+AryJ96U9~xnG z7qy(inO%{a9)5$rnL~FuQ38^@tKkovk6Tl{CiBeLiz zIdSy7Upw*i1TI8ut)AMtXoe02kgj*?i7ww(TTH(Q7n|qO%5ctb$k98{BAHwJ5o!30ux5E~%2 zogm=-lFW~vl#(*tB`?=$R0AKU=Wwnbl3;|B%Jbt)$WgK5`*lP_C|~2InX8k^vNW%U zUA&qbEVEP+F%=HsEi(|?DL5lub4Qq|KNV}qknS@!oN&nPZ6$kGfUo&}J0+bEX+j3S z((=qgz=CTCovyWhXjxbR=_@hokYV>`Tz(0&2J&s@1L-44CyPHn4Epp^o}7qcHg%(j zQC^%@E{OLN(`keBXCvhW%}>-ref&t7xrF??8Ny+dh9N_WGq0%M*Wui%|Kcw8wh5A;;j0dMg2<}-6!B}z989*Q;-=DIlmX+o zwC6dmCb#2yzTkKZHhUHNHCa?H%M^%TRU2%Yp)7*kog_*3@Sknm%q+a@=s^FXA(d%J z!39>bNpW9dL%NphG=H?-2M~I)TQD@xVs>1mcXyMwjQjTVQGAm8qiGFU(I20Fddv-V za0_+pV>GG*yW2wNJ>8@tUPK=tFbdIV-2!p7^A7yC+}i~BS&KoMrNw0u>~QBU7&fK! zP?dJN88O5_mQoTS|7md#0;#;Km$7gTV%RUK<%Vk5C9H)b> zWKR0>t4k)_9b?%6OKJrStG~Qs#RrI@5f>~ftmF2{sXr6ue;-4FRRvp z5Ll>NK|YciO}=LDq(L(a+G|BSSyjA`{R;W<`-?ygYCR?BgU5lKK(4W@qg#uNj(DFh z`%>?HYExYsvUDT>;cAnjxf-q+Xg-IF@VwaZqSq3;Tbc3oyMK+67mNo{tw&lKLaH%p z$A{Gd6_wpjtBmd|DZrcwXn&^`l#qL<)Mxxly>tFkq6++{P|bu{rpJ%-$a| zS(8aJnas1+dhYwauHRKZOsM2pg%3xfjz_Ssz{_V&QlSHbId1ZkJ;B;4B#tS8_ioru z3uVsDGzYW6@59z&qN?%3d-n54ru4D`Y;0^7pV9A(9@1ZPz4(H8yeJjNkZY~@Q5l5N zbK07#+E1WL>weKpJME2bmFS5{6>~fj4fS5G zlW8hEB|mojok-~JSzcM~h`8A^9;+6b}muD0~Tm46lzled=(lCX7S47&Ma+MNKzy^_m~=I z>*wHYriucsNGpM_##a-z26a;(>e|99X;S^CI0Tjh02C}BTsytYNjcRfb;_|($ym{0 zI&W9~$%Mvj-}gCPaxt*=U??*)jJo*Y2X6dqrGZXgz#eDDhi5K~xXj=R#JX7gQv*CP zy$cZmjYojm$d-479YXcO{=?XB~OGh=uvIY?H^Dvu*T>T5C(|yw4K%dmIZSA5}GXBV}m%RF#Ey2~2z*JN&frA*Hm*N_C=UXBcMg0RN3#o^-ZMpa(hgWZQV? zzWJcw>+xLX*Nj5vk0U7$fFff2d3G zP)=>?d}*vzzp^=7c<6bCdiV}s!Q)G3B~I-bE>tEvT2sz{QTF7y?ura#J=O#2E9xTl zqq&f~Y+@f6{jBu2xC{)>FUS5=E| zh&hD~Tk}M|oyN+#;xmpg{H{WK2!_smxy-66v@LI14yjs`NYVNSaERNGd(0f%Acq-N zk(0n{ahYbW2&(T}g5R#ap0s1QI2N&F?C*0F2~tsN_@)Y)lJ4xJRrMdGQQ36~xI0r~sl72`b9quD$~^#TI?R@!U5 zh2w?HGA)DI0G$&jCzw0@q&?^fr>glyawi_+flDs#xu36Qg)#Pvl$a1X2B5>|%ot7i zLj++8MR}-e?pFb-3ZKqGkI#4;LAKtYEB(j8caO0Nr5*U#a@Wu-G1lJtj}tcelUWS7 zeV_BxG9R*YS3Po=pkaZ0P|JUSA7>TgIEL&R7_M@IyrB=lpyBsYvDF8=+i8=_YwX`q zrmcfjeuUID8FnaxJO9QIFaz^mNniAFxt)1{<^qpEAqp|h+2HHri!0D$RsYGekb0Ol&oV8{+HVB9?8t)SbBEli6sV6&gQT=UO|zp89dzO%bE? z#Nuq9RkSZz`*bHGEr|JDIX>eu$D}VfpB4pf*{r zZZ<@#hN59|4-G%@_53^JGn?Xg{=MCa1d9;HWw!TbAkWCeTc^a^%-@jqmUwCGhKK|} z;oP&ELM7M(#u{P5_@A8<#LzQcQvhbNUH^2`ms|c0_87 zG%onuDp^tq9rIqthNuBT@I4GL0lv-vyfcWl&FRbVfb{?ufs6iFk6J~{tKBwgQM_5T z|Lscmiw&@DzW#>TswFYHUEc)-nHEIjH=GCOxtW_uo4cptfKvnrCnr36oF5@iWX!mC zbRc-FMd6mcd*@DR?=J&Ms#VSDGvLM4+nY~Pm;D-pwAGf}{dO*7SxKx3MdE^J}*%e$8o zQMBeHeT@%G(jPU*Q!UR~=Gt+g@yby6%G}CQ+xrfVB-|f653S^PmE&5{-ojGvjAnDU zJ5+ZqS5@-FO07hQ3p9$Ag>4Hn8dve9_(9 zz)t(re0Fjy#V$#D=f=+xkv95O z@hH()*yGgxjt6aIKFN`#dE;T82yVanBqz@}wxuauBzMGJAte<4nJ#Z!1c{yFwlZVR3b!<8|Agc4 zCUfo&fAOAWqNo`k46RKnAf#heyJuDMcXtMrh=ms0^3V>OZlqC^_kLt(Pwo5H%}eA0 ziK_@It{_2eGhmCJkXBQh6yUqQz?oXD^gdxi8=fDW_85^rpHnJ}RAjDQm(BX_Qh_-o zo$h(YR|4FaIN$wxTnddcw*=XzX>*iEJuKctJ|`ho57#+KTF$IUT8SaIsdObM-ExGi6m34i6{>=*LT?mXjoY z&hs;`1V0z?U@un0gq@PLzTb$dvbR%{h`d^eTvHzrZE)oW^M1^Xcbkw+^7@cH(Qffo zN?JT-VTKe1Y?9|g!azV4{>xM!3Y&%ynaj5C%;CeqD{bCUMc9qAO>zdI$s}hL(YA<` z4o=p?Q4h|Y$P-=<>4AEC_uqC-?@A4)OkYlxZ>Zn~^14BBCB!X>)p9umZi~L)>ik{c zHZb9D<)m|8Cqn!D?`Mw(8=4bx3zK27-p-Q;WXy(~WT3M%sdP<9<4m0WL2=bNy$iuR zx-W>Ti`p~B@Ur&u_ouV)2D*Ec1_%X*&b<$j`n-iJbLxjkXNpCXA(_UxpW>Hc_^CoT zSlHt`92Dx9P=ozC{L9`dyLW#N#+Ny+atqv3MsO_*%Q>;lW&f;YIo}xwrhyV=nWf6o zRJxsBrg)N^K3CKyn_DvCtaN3xXJL3TP?S}7OmDWT9Z!&zjGDaUEym03(a+tUpn~Lq z2r~%J6_m;%(t6=iiW`}`e3~CE%AbGj`w7jrwEkQ;_dZ&Y^HMC)618GXOfyFK@y% zbab3Ik?Kknp^+I`=1@~rKPiVor{{R^L*D>~gL=uz=W(?*6-?qwNyT~kC z2-P`hvA;|o-kFs0NUm6NnwYwg^$tC|@R@9)SXx>AGeW!6X+%7tldW(z<)~%pd#b5*?a+1_3XAP2ywOD!B$b z_niwNT3;16|3qzTOOdsX-y1I593iel@bIe;%+Jw0hSqANR3WEU^UfbVqH+JYkWAud z!Pi^2YQ~@uWLQc6;iHLwdD^Z6%9Zv7wHFJ0$s>UusB#;H;Yr4Jty6e{Sy_Q>^McQ0x zg@^l&mOJdg!#2}!h2Ie1FBmtgslZK^F%D~Je}L1~rOXhFaEq5nxyLLi!3yop$TDT@ z)3gwIu^a*oljssD{z=5kvakq7jKEGzt3d?Zp6BTS%2i+ zxsT2(fcwGmFNldoIC7VlJ%}`=egIzzzC=hK3#FuDI?p(`h0?((l$??jfdbcaV3FJ2s8CY6v6SiFMbkNS?tD;j*>NS6SY%Vb3ja*r0PHblo z``-RwiV~LQf+tL|ZC<&e$g`jLfz0&VI52prn?m5kSo|oG5GSaWd49yvJ=T#?mLP(J zEGxdi-wB(5l;H7L0wkiHcuP{L&+3GhE$Cq=z`_l_{|BI*cp-xFi9_ARtP=++mL3&2 zZ-VZXC32M-rXH@hFe|&57PyfJ`ThZXs^#*-+$Xt0bpnMS=+a}Jy26Z4(@`@pXg|o3 zzcdR|eGvkSg>|x>h?2Jm62*VoB}38tRS8xBkcZyaL3nO7E=v`6IGL7LdM=zwyrhTB zYT^~Te%e+V*Qq*ev@=1`4rOAu7VQ}wZGMi{V~B<^`i%t$Tca?3 zWm5B1&IYi6BOv5gm_EM@JWZ3fJ5~neJ!bv^GH!^=dA0k!iO5PMZ}kV>VqO%xYFg*m zcB{L>H=BQ?daubiLi+vOct!(q zRpaPooXj^ve8?i9aEnxWFYC~uKA8IzSxqpo1y#fBn>wQq-HBdkgJJUr$&SH0L9<$Z@2B#*#0>4+kF;;$@+J!ewn1Ud(XaB79yu;P%yva7*-U z_UN^)Au{}oy!0yeHW^}Y!@+{70aZa_7I<5=>-ml|EHm9ym^E+6GuTg$*X1`cksJ zFWp#v+s!9xj+ukC$G#Hu2b}PIa6<*Sr6rGLcticXL~1^WG0xw8j`UsKwaDXD4JJR? zOs3M^ek%v;VoV2Gmf%WMD&zNhHZEu0Fi*r-!g&`-o$ut}yKoAW0FYt;-hVh-^k4!m zW-0pGbgB-NFV4;i+7W4KHc`!YJ5l$_v8YT*11bJ($`d(}a|D|+5;St?qy0?|FJ0dY z-4;$Y?Da!k3TY#dW3II)ij$E74gI3@^F8559V1)FHCR}pgCmjXi67$yZh!ZJDSW8T zg_|d5{i3Rfezl;I089vlBX~r*vzjKms1GFx*;hIGL^qXDkVRLg5f|~C;^VuuHGg}X zt3Y5MlNyFx2*Jk}cm@(&Vx)6a{wdk~y10b16%U#1Fj$ObTQhI4{C4;b!<=fel}W9= zLAbn+@(;S&>H&8=*0QaHWyhbdqSpzsVzKVY{so%968iY3$y0&iR;09_(4Lex<;#xlALw zVZ2uoqSS)5*ygca?VtUmoAkMfH>-q66GsgV88>hO?~)SS@f#jzo{Rsp6TB(KZpZh< zc3y>$rJ?;ZiIn7r))2w@ui%gHC`kO|?q&7r3I@f08>>r(CCLwdpJ)>W$IYLjF?OaA zF~dZP39i5YMlmuS|3{(xKLzvu>vPb5V86@PjNioP{sEfpU4eKt|T0 zpC8`eSWkWGqbjN5hDN@G<^2Oxnsu;V^*@8I#fXE3<;0%$S#HSjm&?W|hjvuL=d58v z5Wk(SAEegbE2=QFe8ZsqlUzN?QBTVK%TDRf!Y8f9eL-PwWQu-+poOArtfJyXme;fV zFaSWiSR{hWh~aW4e))Z$(LMO#<>cgoyfDZi7am5eL%VncJ-pEmDDHHGOM6R^uXd0f z1s=YNg;%Hi1Js!tf%dXW8Lw^qJR0s=c0obCxMki=GVQPV{>C5gn#zsn2-m4XOvDq) zh<6eg;Lu8hz9YDBoh<$X^z!`iR=?|qb<4NfxT~w{FL%qxabFj@(~APbJSrsy?tJ4q zo=pyMZSL?^8&iES_ijdu9N1a?ya@S;Nhq-Fo4O#}6V7)_(^wgtqhLYFf$U-Rgrst( zXSeAr+}U<=v(lCJ%8t}!eU=r8wXsx68`s|1JMguw(dq6RPtb_4yOJf=M2Z(=}3JYYN z4B8S}h><3CDT+4HJQ1?*ERJGdETKm}@cDl8I5Wt#6tqpmhSJ)*O)-5rY9 z-?BEnE>lKdiDh%SuZwm)>^;_&q}?>DJS+K}!O&`#8M@hvj8pjgbLW3FY$x~JQbh(7 zKP5y^3Hb=n(pFdQpSA3}w#7v8n|ETI8h(Ar$G8fxw>1-g&pl4`Ty&z_2*Yb;hXKd& z<65>W4D@7v(KsF*soe3#?MEDbGYyMSFwgSU0!d8}Wu3E&ngGu2Q=*tqm*ts#ccKny zWJwWpdfb{Zu(n(1K{MGkC0?%c?+pci{Q_vj@I8R@Ee5?>D=$n4OJoRHl76Qc{!WG< zKh0UDWAD~~Z{(jsEXyvVm|v>y^m~JjkQ{cDY-I;)6DYBCV`yy~$n>2*ZPzCVeV>@u z=FU#sQ1aE8id*42r`FZjOO`V#oJWsiQw9l~9% zFo~prW&fjvIHR*+H35~rxoqYw0Tyx%!EV`*ypabxZ3ZZaj_RS@(ACPG6qUkN!Qam*eGSdS(>8r5p)Nx-q-?crmV@>hYzxysgKsoL*c7Kv9tHbZmn@&TU^JaG{y&;gtQ*V=5?WHd-t zbnnxs=7Cx-OKKvgnE@p~)@@ctzlg+o%lSSp=Oh)3)-y}#&LQ9h6z6d5VTR?w&n;I7 zI~4gl2d@F;zPa{ON~2*D-WxV|-}v%M3} z?dJ`i#`aBh&=Tp%^diVta2V%|IvsIp7T6IGR?qW_e5KS>({F`pYbK$38^VBJwd})i zfuTI5HW3Y!)Nu%^OxCOXWZncywxG2x&+qYa^>YTuHtGAD4oDfST=~JHcmaafs(x*R zd6q7y+*^i$Dn8x93q=jrDoKkYBc&BYpkR6_HKEY>J?*pJQqwjK!!(F%MMoi+!3{W# zb<-MAgq4b%32eX#z2;LA()#?PU9<3zum|b@|07+!5{fN@#0JW&K9HIgdG*Lia$*a(;$?fC?jbSu>riPv7}m1U4CsE-gm%ELlb?c4YJ&uyIeCQepRxChi10dW z{r>^D4!u2^P_rqc{+PH3@iMb$6Z{CnD`q??em1Sw=M=LKKCC{=9c5v&Fj;W^op*!q8?@4R9*~- zB3|Cdd;*DlrM94D;e_k(KDD3d{P05B06|#piUicpv!lkC(k9PChaKF9e79e?pwdQ{ zlJkKi(9{;4KL8Rk`fn612QDIRpxbr4v=8M{>hwimkq)7}Jxh>AcU3VEL*S?b{|r_t z4J>8w`5MP|Vj7M36bwU_-GA+tSaeTe$o|!pC$HcR zF?H@}OD_707Wn0ZCpR|P-~{=Xc<#`BHrOn)UdX-ZL1KHdZtHpA@*wuNrM#;-7mh+G zGWrGrmaF~ZZ7!C#^0rb^5?4V#9bWP`m(5pS=#k<3GJR~s@QFWDvS%M#jg)Wr59j*K zEub0XYph?_bE4i91B>kQPe=nBEGh1?IVUi_<7k9P!L0KOpYgoSC80(cqKOsy<(Ep~ zsMJmd_v(vLk8&rHP)M)Hmt$Y^%n1SF7*h~u(Qgl8>P?TTfJ?Cf%=ZIOI?T79f&4a9 z4;}M2w6|5XBuNW@JDY<2Z@^9ZRYpTImaCG=1aCc{xjPR}VA|}>#i74G)c7uN;f09y zxRc`g^Gyt1ckJ9Z>0;Gu_*2;RCr8x&ZdPYj!y-|FiI_Sj1k?Wy)cMYR?)Je7B1{|t zQ8#F5Kw#ySOb6pdc=Ox^h5CE^c^HL^&lOVo!tM923MQW&8RsrnrWV^kW^H{d z+S4E^$;^2o zg#KZuuw46EJ6MyXy*~xd{ajUFzGAk@F>RY=jLH#Tbg#7UBhp}r-Qw$wN-0$zIciO@X$Ew zSsfVxlfrhMKi5dcSj#fUV2iS7X#8N!#faiQPYlv-ZIat7 zOJmsisrQ2B!l_kp^LJu1)5y)6w+4giCcLjn-*wCxkU{D!C?wAz91pYo;1bmA6z-m8 zDsU@V&EQuyOZ7cVa^GT}n}hBp1(4-5?Zbe7pQ{DPxW4rK(|t&ci67dN{^qyRJEGE3 z4c6~&;C3b0t->wwmYk%}7CYmHjUDkL(;*Yfb>97VSVaK%zx06r4>E9hw)2~KT~KpS zs=by}+hDS*v|%Jc0adJT*5cHr@=UG>77W0*-^l^HF-8SJIE>O~;wu$>iekie>2hz! zwKq2BH0KQC2RE1bsUpR$TyltDQoEC&17i(;3n)^?X6~|EZznGlV|zosditDDO2u$I zi35#Y#C|U*(jaiUlbGt}*(nGE0yZX}+d?YXVf2^ShUfeF*|B2a?~&(hurnEp6Q0Zt z#nbnpBDsgtr;xI2ekD%Z-l>#V`41H%iA=q~=vevZwY0Y)#2~2scJ2^*#%Umy4ZP$W z$;O%Q3Kfe#kVEZT(HO$+RmPOD+@GyQe}YS;ZAE*zPEmGfO!2hHb4X)a%j7|{i&_5x zX2T;q!PrCkc0?IZtu1hbGCV&*95MX+l{unDTuFrMYI2gReL5^I14`DPpx60xOK706 z18v4|)r^?-M=nSjv;KP?U0e~tj68EeWD!;6$XBeXR1xEcoQo@gkD1_pEm*pp2eWRa zpYCVL+Q(xX#Zo&`7^pg@grm8&xg~ZlIRTXo8*rn#=t*$jO8zzSHSwiv^SKIZYOBJE zg7QSIae*Pi#((6DAWWB4R5i&}+V{DOI9Mv^yW4}dG>%4*fNCmLaFJ7X48K#Vo?JbP z&lmKkb`qMV|{V}9}7gZ?;faueI z{c#)}jSqb~hvmX`a1EtshAP5p;?=brqRV$*>k~T?wXU+)L&EHfW^=uh9MH!i@%?MA zp3aBXA24ykZ*q*NkF`l2qTd{+JRPELES-vS$!bYbWZe^`eqs2!SCIMn(Bo0RLQ)om z;#@_F(?0zn3uca!5ssp1PGXu?A}>o!+o39On^gYOz}=djCo{uao(YA-Xn<=K9&u=& z;N$mXq@RSw@cm#WbKnU`U!x_5$KcU~JjX5T4#Bg0ucV!30_9U-+y{|9t{+z@f!eut znWr=!>(IxrtONey-&Qu1vzCZ6Q=_syHwP6q62;4>xP%-#ae~&+O4~My>3Iu13+jlT z(HjEq0bw0yF4Wfzy(D^V2HT_fAs#b}Ro(-h5tdsav-;_1Shbu3RFaA- zzuL*{#||QiM&vznB}c;Kb-q?bI`e7f^Ckf{TuBzr^(BA>t(enpWiI3o0mn<4>M`3X zh4`CS0n9DgKX#e&U>Zq-93vq$?w(p?Wusfhm@^tDjUZfp9u!8etE3-u75jcRD-GVI0Ag!S^iJlr zq|#@_FZ52poos&>@-En6xhx{i@{A6q%i@prYVC943s#L8Fp%`mNu;1K(2cu83}nFE zMlM_0?c9a<$9#U$8lE=Eb{E2-C~K3AhrQ0)U8-(KFwHAjrAS#VGB<7x3mWmySx|}W zQsv-73=i|o8Z%VsbDk(m^PI5trL($z8-3b|%;zK*1^u8_-!q;Ys926nSOyx`iC?1JC4L z0*3E+&bQt4h)3CU>R(HjDm#k?_g+L|M}e}Ia~(*xF*!BY$~S!Nnwui%1K=R*c^Y)) ztRX=RLEq~`zo^Gr7}o?k)bpw_m0u5ORd3w>nv8pk@%z&Lw7Rpw=opOtOOIqn7dHZ} zdYC89I7hQD@ci#V5%{xn;^Bcmmo)jBopMr>Yz5U+%icQXL;BVQ*-}8-nHqZSH)m&F ztZiS%DTAmjf9FdRR7=PUiEvtzYzE#kYn9l|I7|>OCv1CZftqP>=}=|uMdpn2;ZYVo zDYDpku-5-G)KBkn4|0dyk3uIzhVG(KMgt41`{fsPS3M6(2VePYv{SyN9mgjRnvuoN zH|Jz`rIiN>Rj7ZwR36ASmTrU5?fixkMsQ;QO?cV0@N@eJZnsqhtj?zmGKd3NCESWL zDG^A`pMrXg-an)l47tY?L?vU+(>WWicWv7DoSuo&_v7M!*H-<#UbImtyr?r1+_ZovP2 z$i$)GP5MW}i;a;yXf1J3bJnL~desn)<<7+irhsyTqxZ`ve3ZbZrTQd!z|!WM<|g#; zPEYe})w3kFZuonSOKCrx(aB{Bv>0wxDhQTX2B}(TPRA)V*%*Ei3N>feEjP;z>fu@$ zhA-ky2`%^(=o+|1p$j!0NZPo}Fnj_A9}W0OFiA~{k=`mtD`^jUNst3y%Epv)IjE&n zrQOzHdwH?{+RKamvX3fzqn8eVA49A5QX4`x?SVhh(Soi?v-s@nB8N<*iSBA`s378$DUS8dF zr;-EK|Kg6r|80HQghryc8g_%XFJG2M zy_4Kk(Wjn@vH-R7q5Yp@jK}#0wJ-#EN*Dfo2?_mnMOV(TjdnaMtBJ!jZq0MVl2NsdRp-hc)GPx&H?AA%^f5?U0+_Et z)ZE*A_+*(aABWY0bzP;4R+7f>qjvZQ741=ZY6kR}5^~r2%>v7t8RmEr-4JMq&yutl z;l^;A?;9ehzCsMy8H> zlHLTWBNcu<8~+{r2$?GLz-*u4Y270dy58Wq`P`%bgFaf-o2$P0WTnD;^~*}H5S4Pz z=u(yl)vn*xArm77W7QP)8qR+X8~zWE;r;`;TpsK9Caz6&4uX`&dcBGBQJsTRI(d%W z%nhrr1C_5t{hd2E{!3;<^@z&w>PB!BdBHai$#@tNTbBKAkxh8$8}p4AY3QMg$v}}E zdR1PgUzurY^H~0ouK-6t}PC4jlYHqm*Vj5b|Qq`>cUy203-+ zrQU72)t5w(jjvs5seEk*7OGBU8PFMEl43yoR#&_rL*JU#&{llJjeh1U=Q97zGg6ZV zm%%#B%MH*4;(QvRCWX4K&UTV&_4Zz!?82@=Tc>U4KVqSCd*P=IYdIo+{h1*S{_?*c zG6$C{liXjCq~>`tiGL}Wp#=faUC@PSk(j^i`e~Q)oqhBvJzA%qxm6VLk8bj=ug#^_ zx5a1DHeuKGF#%2~`<`-ViiJ|4zG%ATpJ+F6*>^I}!|L-{O}||F1A9jm8uf6v;l3n6 zKP}~f39dWO%Z(p>Cpn}6+S=*wCkRrm^5~V^0B9{o&(GJ5I>k8#}En^jFFb z0+#f+h_Ck2&ZT#jhK(Hl>9Ut${Et!KF9({+16BTk3ctfCL1;t&P`_UNw~}xxVK0Hf zAid4MHP0HgCV#_^n$L)Na*I~&NK>wj?I$hTn%!tAmqfbImQXQVxxK~O4X6qNz<8Mj zvP@Lb=2>={$@;wJeRxBkGifO@qk?nzFZ7`8gYV?RV}CA7t4HaWMyyse@iV}8=`{!_ z#@gaOPCvO%7#z5PSkleHOaIpVzjfV@PtC&_bfkN39`#SX;}&(Spf`Zj@PrAdOm z3Qkdzf;wfBv-d3LC7!4TpvQQ$n4c!}=6p+L8ziy$eN|woN(FoLBJFt~RQ)A_R($40IL(*B@MWkko_AL_KB18toN1rVyrop9bgWn3W1G{&~TraiZw#CPn?VC;9t zrVCog7yeSNHDN36pM2br@Y4bI@=hvI1y=y|xsuYaX}dt5Bs?p_a{lcKA5JF>a>Sbf zYD#Og#rr{997hKy^S4{hZ#nEcW2OnPs9zX}k%l8Ud`Mi~AraN_xo+{U*yu~HaQ18I zR&@oiahDTOMFUHKxFm!cF}oMrH!q+>*H$aYu`@xMa<)%INd(c``R9wDUm0>;t+}(E zGNKfn=^IQW$jEY)8gW>TSQ+_+s9J1}<9{Rb$Cew}$K+viVyGZZr( zf9fQT24++gu}$oN8^i~!bzk=@&>lw4r&_}t@qen`B}kitF5F*r&siA+`m3cTV3~3u zu<@fPg`H(fbCWJ&3;cwXL}DVW&wgO!$%j(*agCx`K|YTjq9zuR`+h1a=e$ovF`Y0U z?~&M~KX{N|Si3$yxSOWa_7W0)bAjnIIFWCi6?^({K=DweG|Mx>x@6Y;@z3qCRtr~5 zYi@%F`%LYrecw_9O%5S<&E%CfP*qRC8~Bh&fSx73Hdyz-#W`V9@wM$e}K zA$joR>K4XpP16v}O~bEGij-4|kV-s9Gjw`}m%bJJ1AJiFczR6#2l&bZb9SMK z%^y=;qP<=S-n*cR(LTyYXuq%yfsK=4b|WU+3B{^#=`)VihobE^Bxvl$e9#9YyHVc| z`Z~$oH4gv%U71Bu|RK21x?P{1qfL zSRqw4!mf+JSvf1^DllA1gg4~au&16Ni>Q$tb)zJ8TB+ONcIQAs$hm3(I!MN*VY@_e z&F4dWnAD!vTqy~J>eTP6VLP?jyMLIx*#yG##E1(h~M1& zMfa0`C`kK2BcD?V%O+_McrQcYZNf1(n-{hJg}fU-ArJ;jxcf~QHNX%jv-`!+EjzO& z23|Sc;ls|l#pabA-Efjrr-G|o@YGfDcC&uwAH4Hji`eFsQ$=_ZcZj_6d%9YT?N{h$l~!tjJ=M$;&c@@ z5sYyBVgo$~k6_K=_Gh~E&%dD4R!xKps=_V5TIk5{w-Z(-yvV0z*>MsZ(w%9c6?t?G z*1|jPz-eNuuT9KyMH4N7cgnvnlycUWH>C>Ihj(mN;)^HjUla7j@wrNOp+gK}dV^hQ z_S&nio)o2DkVz_2^`6PqW1Bb*G{bv`S|aytIO@c?%gcWmCsYq&&vod7uAa2k>Tn0) zFr?KVG>;rlhh?2+ys9zlYxrMcoe3v(*@}3asWNe{Q89dWDgsC@IqBlYRM7JMUnkwb zoxYZ-Y`=bid8S6+)8RhAq{ax@WXfHr_w6u(%Y;wHb&z`EjqnH9kcQt^Z1~=EHVRpe z<+|^l?*EVA<^SuwLI3_AHg&l%PS7y@&g=3r>}b>Gp4As&zrFxk<|P2!IA0L!h5eF) z=|HO|(6uuxOLtgO?BrLKEIh{gE(G*2^!AnMfA4L>-Z(GJp$#qj>U@hl#g#WdtTOfI z(R5b^?3202Ur@R!GP}f$m0y@<+TkyKx1EVz48WhZputVHc{NtoA8VT)n`1Tb3?mT^ zM9TJMJhHXBLA!4PvqrqFgz>h{`;`|T_Wl9V%2s?gO(3(Bl$R4lk(~r9{vNGi5Ag$n zp>&D7zY90N%*mKLe?-Ej#)8e|%%iRO7*#X(!fv#VUpBr4M)$H8j4@vt%tll`f1D{; zxL5?YG%qyxWlQw?ehd@En7)|0Xjd)+c-a2Va{335I@WOEjqApiJ8s9_stR8o>WhE> z0O><_t1zzDhKlBu1%H}p)~S9*rDD#Cfiq(8DR@Zkzm~3eQGz0X*pkW>tPX_?M^ME$ z^_MS?C0xoiidD~07h=3^(Mn+Rs%02%&(ucGAlu3%Bwe zxv-XekYiQ2eZy1TAqUBtu&}i@ev3KXqk0o}rv8#chl7t~S-US!>nI0xFnox>06@oDmJHM+U5~lz!Q#VH+j^8VkP%gX^wr9anruHoV#HXdG?~ur^ zl83|v5p8J1m2e{1R?3WvdE(bRh}~8&CdbMyL1B#=suU>BKuz#dPaWJLo|HD2uTL*= z@lA?k{?$Fzp@MTp_x`Pp_PS&r?N}qbr8Mn|5D{&2(52(v?^#8_$pD}qBADxIr%2`w zL(@M-@+dA7f3USqG}nP4pa=L1mt}DaPit5{lf@wdDQTevIp0Z4nSi{j% zribu-q?#Fh^bktec`12QMV%%Ml1kHJJ9U-aO*WPd9eFH2cJSdZ_l<9Zx7LRemQ}5y zR;@a!dH%IvL|(4R%(3N?O|m~cnhoO>_L)z~UcrkoOCeWLvRm<@miuqoVF5e?er97i z!hi@(k`^jny7@F*vYxMKd_cu2x*t9R^1c}sqF-c0J8hv2_FZ)C|6=VegW~GLZBG(h z0>PaoNN^8sAp|ElH16KGy9NRT8g~osH12MVLvVL@cX+4YId|rqxl(tgYCd#zb@l$% zUHkv+^{n+OYm}+QmE)X{jMb&dZ6EFp2EgD9xAEoX&)*-3gj|Lo zK6NvFC?%ASYH+YAd$(lgqqFj9tlDEM$8yA@jUB5;>b0E7L6x$H#}Xzq$EudOlbu|` zI;(y!#@e|D#_i|oHaikYT(n9nXrFo%oJeW=u&T4xrahD9t^@e-bCjB|c*NN_!a+&W zS#oKAPxwcuq3V1EPXd{$3EQHgP%Mjdta|dHjdI`fir94(tta> zS0S?5+?SP*P@&EKovENw^}##QL_d3v&OY3SPR*nbQu-~;TCQ5u>GHotRCK=Mg(~s$ z_nHW!jQK^&^5(JK2Ldwv96@<1Q;~NqKXRogLdw4oEQJg5oEe4fA@7-yM$-hIdE)Pw z#`h#`#lT2)L^iKB$B{rni1(ehdv5c!3hlGPUk5=8@aRR|5n8meOIQkH8~ohp%EQv z1`^TXGmQRNxRn9sl({W(76@M$o8|nCS20PZjw&yoL;GCgq`(Qv$?93U`r_HW4o(br zwP#Pnr!=XJ;hI5POXoWp*cB|YbYl6d{SkR|g)G~;zr#HZFA86h)a`5ci(-MT!8_*i zZ>Qps#ry+IJiCnG}(N4zPkssI;sp4COsj5RvCP zeB&m6ypN~E@G3!moSzg$uw4@7xuM9igL^q1p2&WtmYTdsb)x_LCyu*Z5eN2%e}DhhJhy82koP9>ynZiQ0TO?Ry_I4WUPf2% z-1U#$C?W%2c!vXExMJWRIpb4ZX-w1=yXPhX5e=Gi_c7sp0#rtKDp*ysTB9Q;Y1gqoKkRZslX z!9Ql>ug%OGz-ntUW;>hdm5fmp`-$aOK|WBp`vA5JHVdb~8+`m)dJhV`W*?Fc^rrx7G;=M`mns|B~L8Gsf zPF@*`=uQg!&6+vdkFP#sA1tvwXFajQ*_zS-OAneE&0s6J797yaob{W5DDF4e!my9b z1zGgP@W(mYtBPSBb7RmP0`@q3DBo%D{1xQ4=aM6|-HR@`gVLIWq%hSf${n5$1$m5; zuIlzl+2fw?*eGL3`bmXP(hVFuGd(_IsCFZC{Uema*OqKCj{FE5Wf7tA9Ac(oOv!B4(|Sk!aW)op^ubboVVkc4-Cu~jIDEwFOu&d+i! zPbYGRi2ZF-XjEaTKOdon@oqkNI%kQP4o+{T|JZQ>Nj+x?RELw0c_|>CU z!Kz6uuR!%>c3I%nqHoDB1Uv4u_A_TOKZ`!%78cQwK4n4~;4_?4HN*C$@Nq^WLGB@?!I9%7b8eq7c1;eJ95!P(6;%5!9)}hEEMRfHfx+5_Q&9)*V@%e^N zG>%2{oYoc-%&>h9hx$c&P1O_=OMX^n2Qxs16nB2d#!>&hQ__BIZeY%bsvy;C0t`Ol z!ygV4Y@>ei*bzBM`!xC4Y=(XpW!^hWBwtnI-e)zz$`%w8!(|VmPio@6VZazelUzFN zX2UgEn4U;wzJG{Kf%-t$Hr=QN$XLWr2RH2kX==~?-Kp9#UB74Xah2WXvNh)gR3?Kx zpcZ_y*QOztUDNuG-4IH5tRrCFPTNY1q=_9Q3`*)iFIUw0kX|=qec}~b+tlR@{#`2)k@zn_8CDjE|2a6R!DZEM`-h;^1oqLdCk@*7RghmH>Q;Hy z`1`WDlNMEr>pE+FG$Db5<6MJ~3=sw&foz4W>;5c?M(O4$Su#}%J(w8_z#>; z<>({t=6tm63w80*vh_7jNzMwH)qIFpTwfJpPC|C7V+^}5)5){oi1)EebAv0aCajDB zf$WuO-ykd-L9*E16@W5?Uzfb^x|+X!dCr9y$q`g>!v_($XbgZ=m|>xRpA#?}tJBEl zP)ib{Lb5yJqrTp1({xr^Bt?%|j^E#f&rmi*_bO|P zzWg9mkC4VQ%xq#@SE#@FMfDlEa_Ovxu=m@cXRvyJ# z+x*mzsHkXh?Z2&dDjLn#y5g$niXX(`LrYG{xtWG+Jp~utWtX#4)3RTFo{JuD8N6c5 z*Z9Cx9`$1XE~bH7(MQxZ|M}6*Ct`@{gZNvS(m!wkz#Al|z=xq_=f8b(Aea#-Z z7E6HVSQWI4uba1!5N|+jHkOZ^lMkr0_QSF{(M#EQn#ca9TpP}dbw$}5gK(9Q%Z`e` zf8bbPYPXQ*&l*oY{sAY#=<@Hu8sWwxbt&e^yb*ns97**t!vZsl(%|ozBI1@+P*td%kP9v;n;7w zs#VZMlce!>VWSbH*D+PE$E|eaM#y8VoWRxhH}r`Bp8H4+OG4(GM(e^}yZTPfZ)W2{pj55ed!*4xKS-zvzxVFE)G9Hv^p%}_NdnIl_I4K!i){qFjzyefdS+8 z3i9MIS4j!Shj10{7;{Cm3%Q4zH=;9DC0u!wQ_a?~%BjeWfYQT)d+@4+PHi^4Uo{j+kWEjxz@Q6RHQ)c?(q(=3_z4JU84_oN;$9Fky6ygOVJ)c z`xY6i9;YHgxoR>y z^7uIC<&q=r^P zz*poTDFYZt2AS?8*N-?HSjOaTDua3OlK;94X!}}>Gs(b2xq2Qk>G4)BBB-!jj!;a~u6kiv@3=oO^l+%u%^jlW&IzCn+E_TwI zzzubp&zf-!s}y^hyBXOn_m(LBLPw7Iehcqm#66#J6g4X$g8Ll=^`TH)UkiIO&!0o< zix&~y2=&nO-*_>oHC(yY$=Xu{mbI#?wzlrNE^R@=51}bC8cTF8fFVDmRD{fL{A!I^ z8xmzO6xC%`r(p1gu(&NeAOK8G-%pWm0GvA2p3a{Mk0$?eTr7~lpkZz4_`T)_GBR8Qy5gQfvH!UqF%@Vu~oU6J*V|jYDs9%71#Qn5A^jfC3*Q zLtXDCeU)3z$qnWoElcJXavQS@q9W!azQr5N4Foz2Nb#3wJAPD=bjRe-Y`zgV?51_{ zXyn|zJdJ*9w_LP#*S0C0Bp+5}^hB!3NzAYyDid@!ZaSu&InU#%msE^=F)Gm;xkER$ z-7p&o5%ri)LlQpDx#SG#Ft`V0K-|$KzM=NpnQ1ts--nD5kI~}K!Ju>W=>I2~njQg` zcvl2jIeT+1FAl&uRq(~=j`e3Z4StoF`z=?Trs?{`V*b9pZR(72LS}TQ4Vj0IB=b;rwVL1NoQ0vL(o@WItBC9$--Ar)lWlaWe-h7j}99Zo^M$xmKja@1X_QFnUB89GFC zlwITA_RHmVZ*XG~D)8%alujKF6$tN@vEL7?``u}Y9iKTX5h0vi2PO?T z-W1R7WGzI~$~neq!b9z>r*yb5-F?vsd4n{bg?3_qBF66~s*5+I&5XmyY{{WSPqW_Y z;fa|kNIQ%%Fc!tJO2vM1>zFtZd{jk7_C=}VB;M><@34j-vK_lMA?I>uQa9W^%PS)f zm+nxL%$>*JppC`n#J)J4@+e9tOkT!QI*N|EQ`eI8_+~GLd7K6osFZ=&A?k)ZvGC~b{($Et&Ru-| zq5{YdQ_VWOTy}}IEhZ)N=`by8ubEU~iHyYaSDz8s6EVr1;!n||daMUTkF|p6)Blvv z3{X&Dd=a-rgVudpAvC{V*U103RCWqGjeMv@Ekp<&;y)>pS(jC1jNP~M;rC0_>v+DY zuf0fqUE`ds{Rd9?AGkjdVSh%9pq)4m4A9LBx?O@W;f|kpia&`TGtN4ADE>U;n^; zP3oWv%YI?qBzdbB>7<^f@2B1<9h$8`>~A(BcG)jdUaK!b!C?2kG_d`hUpnQouDWA6 zcju&19luE$f|Jg$OwAvqv($Qc7B}P;#4{AzwSo)_cj-O3)znD6u%bMX+|qZohnq*a z<3Bds90`NADIXaHe50KTOJrQxXq#KWT%3>Am`bKWz4Ktk)-PjUhQ3H&GAuQb?USPx zSXCy@krMreL)xDhMlD%Fl{aVU�z}kBy(lpECAp?2$3?2Ce?C@=QTLjJV^{=f7xK z=)a2^#182G)Wm-WF@ccc&LCmd?GY0dg6p34!?ODQKPllf54~8IM^b;Aptxv@1)3s8 zY~^tf+97WOalb@X)8OEzCn7j-q~(;YFY0L3?~bi6=0%#NQ#y#$Pk2l~mymxNISR#& zx&1k9oS*@(&0f+)`hBuqW6IKKdhJv2(=-vKr?@TuMwC1RZ| z3yOf5VKN8tL2qgD7S`4>qb*4B8Fg}!vY8(djy5q+@SAAp1Xay!Gb1hHhx(S~GLE_) zI>C0z7}`Uha5!iK%&A2o+E0Lu@bN?+Z_gKC`1iOz3d9I1tV?091&&CCBTD82~h!FUUZDu2bvOc$H zp8TQ09Z`5h5v70?wEuh~JkTdy78+)=yN;#F_<4)Qp&tt}vo%BO29fkdu;WX@;e#wI zb=_PIJ~pe?DcZ9R(i`O#-loh*DDgz99^|&Umz)o1TjR|M0dk5|Npqe^a7et4tFIKp zw-q0x*@z#Omc$!>?1#o=D~?*QZ}TZn7%8HwZb$SYGxUcKRdIE43m!$o^XNL;&UyK z8Yxm!_Hs!Ep@;E}2Zi+=wR6N4i8hC|$KoX`O#GDC+pPkyl*YhG@oDriuo{iw?7fFc z)c4_GEtCOaP}dtHg3bu#FKQFuWO6lkA9jbr1}CRC@R!H6_^j)yz2fv2Z!Nn@8Wj$b z9X9GDsMxvYOy9Q*a#`?@tKbD5okpAG^pByFxaKXr<5?lZi}-#DfhLOYlEWi>ppt}; z7i6y|w#|u?>!L$Bf|6~j#(Ak&Yt$>%qP4+CSbmY|aDm0PykfhdF8ou(r=QYY>dGgG zekOfFuJ~R=STE==(W`tWz5HtRKC@T1$~rF-!WVQsqUattv`NG79R1XgTVu=?qQ(CD zqurRx^H^<;GyIGLU9BiER#N4aX#eQsb-t7TjFUt{F&?QRS5@?FSW(+0%y!QSxC?y@S80CWPmo6{GlnP zxmNgD62^+@JrnRwH!<@@&?INyj~*r-ZoZv5<4<{Hz?Lykz{xznft-}Kqg*vEP6T}g zMo$EzrR`pG{7|Df&97W=VM3bh2~KM@mdz6^VBleVt{v;a@lBd^!I10;XtkLQCqnS7 zN$Q0k2J@rkmw-}EDU|`qk@V!hrrMa@Ebc68XL>iD2FE7bc#9r9-Io4bqx8x;X~Nd? zcDe+XFoH%eb@6TPm5hlbqofQ``6h4j=jD21GM@q@2@h4L>~^}61Beq(Sbvsfw<)|x zDvdJv%yga{gJz<~hrsxV)NfGS<~QtCgjw`uZ>`fD(oC)K6ZipoWer4c{hfyULUo6J zw~d&wjBFz?V?kR%;d`m2s8-6GyUzvBfcTHUodN`qY)HZ@wqrF37b)Qw=%NQ2{!*hV zC;*$D8BBwuRAE#x&Pq;ElElpw!bTKi$`njWF~sFT&iGAgrv*2qrxnMUwLB@+$q6?fB)l_hB~_U%*NSESh4ns8EcIf>{&-9D%r#tBv% zA34@qZuo*2T37Mn1y_;#ap0qF($_asDM&yA0I60MnAU3aP>_|W@0M+p<57m&I4hFMuhpS;v_ zR2Y^2bsl~$DDx}`nUhW_W21~Xb!qpJEkxf#>oeq)e+301MDUVq^=;&6X-nB+*c|%M zO=$b~wvJgSHMh3Kblco>*0`>WG`*>xlCTCE<@6X;|7hM!Z}kP)TapW^%-4rQ=O7d^ z?ScS~WK$0tvP@aW`>YcIm>}6Zt-9=X00X9fgh3Om$|7%?DBr#{F;GXtl)N(^D>$s2>%hd1FOZP>6SlzXzbXK)_{EIIr2B;R>X z64bGm5T6@+jq{K??*acNl?ngWL+}sW6A`4k(+cwV(dZwzstnO5g!;>g-Yd~f{z**# zJ!;$a*J=aXx5<{z)s-H2{dr3=p0xER?l?5!&S9}M7+tP{PF&#Giwx-*=Qir3--@Hp z@}aDr3XB{pFSi9H6rZCanm+wfl0>|a7<#g#J0V{4<(d$ zogvl3%tdidLIXw_-VwKHSAYLqpZDV&scGse5MY4!(ANB2a zA;mpzD9T#L^1(Y%Rm|-V637FDAW;H8~wrOGZz3sEvI% zRc9`PJu5|Q6gXxf@E2TTUUx9;4FGr~y?E9P-Jgy)nLF=oG2KI_{}l9%UvnEBx~m_*H7lFVA^@zuQoh(< z(yy>?YxH&%K@za5iu?KeVO;QSx3lM0eQSg0<>?pG+s{1P*ZB8GE6aLpOGe!)UbiJU zYdwZ*E;5wV0L~x=sVKUFgEv~((a4o!VSRokwO^dXuC*IIHeq+uu>Ac4%_WM1BATED zxc&HKOs7j9Z)VwIk$|&Y$AwbT$uFoax-T2`+$h333L`xw*C34n1&2VG$0Ya(SRc9- z$wM_Pmzd22-Vho=M^+YbX{F${!+ttve0YA~K76trs|=wS#8xjF{R-WBaFCw|R0ek1g=qZ}Y?#WiJ} zvwA_0{YOn3Bp~H}%cUNkvy|hEy~SGwvAc+^OzqWxG8!W%AC7h`ou_JbRX{Apk6TEiR zOs*(MH7Fy(gTDaaq27P&90UzGD)p~B^DVI7*GzU>zGkp*^Fmvll_trAIr0lDvd61; z7kOre9u@rRUs;mZk0xRF#25SY;x<}M?FUy9I*{)+s$U9i^x%c3pZzP^1jhhG!;Z>|?KRok~^ zB*`$8rqVm?O;+^lg`)&?eHw`NtKXaFsmi|s=&-F71Dh~@7P zy3s87qSKoqk-HBqM;`2so2b6+NnXmI3#P0QBf{KAYT72XwpkYc3@U5V`iMm8PQK!( z*dVh12P@`$ELWb#s4{=I${_uNxw#0;g} z0akt_JBGO%J~2jWik8D!ZWm5$CpJOm2+SLR#(ib~ida2C@oJTCo_|6!JvizvSvhvE z2@bv}tv-n}B>pWE?Ao;Oj@hHviNEY>z;=2}g%?xOI=vxSnL#%MwTb%d z=6x}vq|e}6vzQSST%4Dp*<6Q1*dzjz|mBbRr!R!ukJ2MB$K z+bMIS!2s^&&p#!uZqBft?WY~4duHyK$Gcaa>~mi)g1xHh_3+7+Zx?yd+guUDGETBa zCSRv?LYsbmRVp?{ouEl;rvX!eoGA$H;I}%t{BZqdNU}p%s3zNtA^sNjvEX)Qb+a4|882=xQz+6RNXv^_3QHe`oLFV%i5>5iQ!ect-u@Yd^QKG ziuTI3o@#6kvE`AY*jn5t=Q4ovp?v-0NBi)+MUV<3WY>BN7RDBU`P`}^xS6GCTTP}q zUSg9pdvyBnm0PNJ=5s=!Q~hGY**~zDxQ0Q77DO8L>&Cu4eA+0JSP47|SW_j%W=8c+ z?V7RoojpZK3}3a^3IjDZrwQoZ$e?dgvhc%Hb2N!IdnkzUxe2 z{vESDS$gPL0r;!#N5)Z8L+R%3LtWK^QLiQ&lRu};M^;o#!`F(HgbFj}4Jpj2mYU94 zf1ZJQ_MZ4KEZexCdnM|$qs5fbrDJK`WU~ahW%>BO*hRQMFaz0S*Mk)F0J80CHKFPx z^YZHfNOHbg4SfD-5;*SP(+U6-+mjrVf;pVC2Z59mcBGiS%nohtkQs+$wN1Z~D^c?P z<)!h}^*zr9h~3=M&+4iH1*yq7TNi!mkg%%+V6<}b>P3r|Rm5c;$1!rD8t?L-}h5x7Q&JR$=}=xCl}j@Qr%8uymB={PS-Y;GMUo zG?7pF5nl{e*LJ7uLpHP%tqr(A_$*lfm#yf|K)!?QVlnM-^z#nE^-yaK{o(gk^t+XC z7oE(zSBfd83AKvLAW2G>5@DBb4cE6b;^Pox;y{#R03` zrxKlc+^J zyRyp!{8^GBo2{2=easuHJinZyagGV_%r{XxVcq*m!*W`BvTxVw?Qw5U;>=eWwG_8H)?ot^)<(o# zrA4{Ck)G;{GC8S4gGAVe;6&7>80zxhy$lE!R`XXQAD4c|c+Js7aXICUP*++*Dxd{7 zoHPrX#qu$V!c3xt5;ijQ30eH`kai`HuX@W>-oYxd4$~^t`}Xy|z-PYlGEnIVK%8+u zS^|>DbhN(MFI_iFix3bO%Cy)7{W5y#v{&d$$6T%Ox1Wm@9Zrl7;`|^%UaoC>wY*Q^ z&QEj;$+GK#3Mfc^lt%x^74d|-$zWdH`B{-$=zO(--R(Kz>N@*ghOnoebaW%x;*p|L z0uR`qT@AzPe*NFS)Kn+%>wXB0Vt+JCxp|)k{EMxU`_nTe8MhyVzM{}g9VN#_5rFoH zzSY_1*(O2uBpG=nW%bd9le`dkq9Y6kgpco;xd={360kC8)2QIn}fg{fB!i zckgqs&0%vo<*V@V(UnhL!Q`R$w%Q;J zQeYzAF$vg+PWp;f$q!%Pu=jw7Do{6x^}n2Xr~kW( z;Qv55l^6xdg(Em_JKh#IA0Yj3)&5bsoIBwXNZgDNANv5L;KeEK9X;*Z*wxRKvn$ZF zVYNj8uwG9-0j28F4I_d6hbpqU4kRK{CgMM(-3Z=2oEtEx;xb03>112TAu43p!ha#! zKs;*+{wt+D$`kJBWNwP6xr5<^Xp8!-JbhAH+$4S(|6M|Et4?3Da$LdSD@b$Pj*i<) z`9OWSJ#AL0wD?gIli|>MvdKM_qb{nVi~q%1IxJPi&7LMpHkl7EnQJ-fue|yLks&Xp z^(LJ{%bd)9Atf?z@XIuum1kRgmua3Y{lbQp)Z68wP-&5R?jNwq(LUPn2gbYNc-6w=J@EyQuIHlwLBVQemxsE*^CW2)gOSMfMG13WR0^$G_cI zcz|;(oC|b?BfH%t-EQ~cbD-_M)EIZM-y0fPm_4?=qdR=VDoy&e@v|V-hJhwJb;KG= zydROBkU$d?2lE3)daN<6()Ufg#394i$PqQ|It*O<``y0do`)K65#Q;?7`t9qE~KuD zcN;x+J$8i#q+d$&>cSS5P;TMVUb;Uwisl&&X`neQ{Eb?vl3ws>mpRvVOTJ!iSV(6% z)!j&Jd}$^wGg|x9t`iS6&@RwdUKp<=$Vl#}^g_%I{}gYdfVsWSJ>AVIE2aT!KqilR z3EeNQDARdv1!_?`$(+t}ugc*w0jN_PZbg8M0$ky<1_JF*B5>D4SbcL0V9D52Odbm1 z51_U3IQ`-2;P!>;)1lJ8e>9e<4Jib!$NW{zYGFI{xMTcBK$q4ms>&$Sbk>oR!EYuh zyyUN3JOhV8AUV?*%CF+=#Rav|yQVbwhrud;{4joHSJ)~SRF9w}q2ar(y))$GP$>^cot9kZw($yxdLL( ziT&Ki57AuIJ<W5t{Djdz;)3Y^*Rp)TVu_=j=5iHuP#%4b-my1TO2&g!Er7f*Lc( zIPP9R(=0B>EIfz}RgqFS+eAl>=X$POW+bv7wUedb`2{CTLjDR-+vk1%*%PN1fG$0f zoxVKPYP?4j=)<&O`cWtTgwaa;9D#xnA3?#)xv2^Yk-ga0SEuuA&B_#@7}LVP3lB@F zGC7T*Pw63abX5_wNHe{{Xb#)BhvL)B{qk{=z~NxJHnq6SX$8v;nAVKY#BL*rtVhXl z>J0>ajm>ohjwA0_ol(AS2JFX)M4c+v6?&^hbvtA4VAfaEX>cq%{fwlo@(w4K-wdFn z=zUa+lVj?tY4J4T-`UID>qn(Gq$aXq9{|a2#(Dex!IYBtGQ@-u_Z_Uc>VC?PyjGj{ z^W!}sMk#6+1}Y#?yCK{$l#lXq!i!){r(_L*SpAL4;|tEb3C!1QIEZJGqa>l*oLFEc zNfQ9acV@kwNL!aBqa1`v^Oi8vsCD|Yd$=YOW^{z8<7I8tGO|puU*)Op@_xvAD6!3t z!%F>%K!HlUm9QB0Rgue)CK1hRwm%Z35wN-Q%5V?@vi5}bWg{M}W4i$Vx?a&mXpGvF z)t~MF27cb3-oWtr>d%xOwelJ*7k4>Gd8E}0=nP;9`3rR1x(r{Y8#}n#q3OuI3T9k= z-QONFjg<8F5MJ^`BS*HSIqk}#)&vW_QK~x+ch`lN==eL!hXF|a7tIw4v1fS`>Pzm! zERxqKAB!?7p3ai)d`ZVWwgd~?qDnL#PNqAy?+-?{&t(-xjXrtza?8Tum6tEnxujBc z5sTCq1>*0n**bhd8<$ffCHz91KC6J#r$QNwwYp6m(h(vzojM=Vi=qwQUvKyeqb@{6 zoh{AOzAx2&mjTdWGzfYO3e3{8K3N4Mk9}-5~k|T`K#_nA;ou#r_@Qv$$<`tu(KsFV`|R^rMRW5UDH{fT0^8 z`$~{Rm1%8V!0R&6f0-hi;x(SAZXT|VP;IW{uPn7-D=!AzH9J~_tpg_(DQPk95eEk} zcf8fEB}mBg0V$C&{@QlhXkQ*vGZ8B-)K>$LoF}v!T|Wi(xKTsewK($Wn3mh6IB7Pd z&w#dy+#`-vV?8Qh&23W0h3A>0Z;ZWlM&7d;V?B)BpV2Zx2RsW=&SLg}u&Qj5xow>z z{n&gBy~tMIW0heomI9n-r~1gt@>XN>oh$yEl046t&1K=H@?C_n92F(^--5?KzA?m{ z_t^;IYEuSJwQcPTUThKtXEj&63Idm6yBSuH-Wh`;Tnxlly3J8Lw4tgJl5zRL^kNiH zZdxOg`N(+Sze3W05Ah}1C$=cp5a*C*MbSj3XJyr0xRAr!%3n}LYL9hINztnd|3qgD zukQd5jws2i9LjPL`W;L&y<>fwb3cP&dZ~5mpY`I|0YXwRvX`>Hk1*3hzS?rvMx=3D z=^OTkmEvIZrfnF2Ca`?I#bcq^l>sIapJaTJhYG147L}AF^~!f&;{Kc-L{1VyJMp2l zOIo6mp%*NC7k1w7pxx?Q`9#V+;)H~Jte=I07m8xYnx*S4|I;x>_;{z9YrkK{MXFhW z^$XQ=ybhE^*3^JgJ|o1a=aoZQzoOOIk(h=IjD3lTHEW7xUSJpw_ZDb1g&z2eZSxYUC5a#FVA1X zTcEFA?VjApZEEU^7Y^_iyVBCit?&^J&$(r(c{YE>ffan|BOt1%!DnfDIAs2EoqEGA zTeU9g-2;&1o=JTycDHE%QrY4XQ(Y9>s7Lqbl(?xqwnY-Q%rL=bqSC2y|LYlm4aos3 z)o74NmvO0_{rd}r>BFz1T*Q3a6ssWE{JF-)Bs*&+{C&9TZUD45m=pSqT4Mww4s;Y9 z5L>RTQPdoOWQGtAT^&Q>iw?NG)6y~+V&04%g`)vZW}}Me@tk5I$Z`rZ4Sd%8OKqC| zYa@nZx)c928_jsx!k04D42zx;Vqf8z3VkjU;FBd7&3*N!o^Mrwc^Q${uT%J^TC^^4$mc(W1oQgVW#Cn+M<_rdDMaWc!V1UTt_mvq7B+IOfmWP>U3bTnk~55R??MQuRdWH=LZ(iM{&iZPBy_Z~0#OvLO% zU5#kuo`r_;F62YMXF@EphR^HYV|{gP_QaWb!NVDcxCD2pbGF{-Fag58Z1iQ1Y>8B~ zQ4VIYKuc37eQz=KbeR--?OYc=goS7)kfW z?X6y0&XR`Xhl`vd@i<|~L?x4~x4Vrl)!Tz(_cq|0qZXl#QB&wbUEJfS3^QE!Q^ZMd zr|M{*)MEaVjiMuYK_AOrWCtggX0BSz&y<(`sOBXR`)T5`Mx4RO;o#lpw(G9q%F`6v ztWgG>)4gwwdI{PpvWntKZ2;P>_N|bHT7m-p-#B*g2rZ{X_|`J8yUnIMPOrTU=41JB zYuL?Vq=E@)D9~<;>&#K+%N2L>xe_C#D36nl=u`_V(1@7RJwZqM41&!@I%=F z1cQ#3-CUXC>+p8k<6n170h03&1M3RN#J-)F3Ay#k|8L*%IE zHzm9|;bPC5&D`pJ1&M8tMYFLfcex9Pg+h zzOeKBMd1i)OsF~wc$Mj|^?nFbML1=WhAm`TNPVhEG(;K9w-sxE?D0}2L)4OFoowo2 zG<#1RJcAK9f2e03g_P`$HepLOEcTe3gWwJ1Nx_y0!W#Ah#*3F591`xG4g3J3B2Pwt zV3im{5nx#PX$@@`;YGQu5>wLHd3|@;01N|#GS0>^5a1ZiRhD7qC&sR zeLx7=(*hpKsmU|v59Z+lwTohOC*LF9R5JT;4Ey8Yh^aV9Z{OIGYj&={0jt)Rg3*dy z#~{b7I_F=XWU;to+Xk3*0=%YWK;CYF3N+>jjv7WBHAEEvzrS&g!9TupCTVfzzYC>q z2XfLS1hT9FWeVM|Rly7WLT7?Gbo9`7)#D%Zf_TF=G6&+v?YcgE=y)J&##EX2&#aF_DV{_v;q3y~o$>5Im(zA-SLgGHjPLGG3 z2G{=?%Y+L!bz1LDSJy;_eU-Iw$9;YKmdukW)KD8$^Jze;$e8Ky8<#F1>L$n8rtva5 z4f_|=5mwL@v%QLz5$6TK?}xRP;fd5Mh`hMp2x)%aB$bv+dEyw5x#ea$b&!7UvcRVt zm=bVS2NS5$!(wZzlDbPPFEvk=RG9||sp`V76V^USU6g33cXaVegC$I0R3%ZEY4EnR z_q$JUZ4eef_HR2kXDxNux6*}2w+xpvs{tOv}RTVuc$zl?8SLn78W7VGiM|?c9`MOUEob{{zRIoYpfd zq-bVZ&c`a?{-(Z9k{o6Skqg5d+@W)zNL>BZrB-%Mo3p!hEvx1`UnWeVNW6SlyNnJ* zkmJk?K2Ts;%YP6{Y5Oa+%+Vq2$L ztFSJVd~Drg#HETjsp4aU(#bcnf4z{rdW{D+c18kbKA zX0nDjzSp`LEkAYy-RW&#IbNL&B*e)T9OX*(W^ycYGGo=wS-0!LMm|CR+;UgHX1h-h z^WPsnsmrk-+Sx@uRf(92GhhSg8awBzpyupRyl7rSZ}0_TYbW+UzcA1x*0*NthMT^C zT3f)vIdnd!{0Mi2VV{%m)%z$hV_jaYs=9?FW(LvNA7iUz`aSUGm%Ov`?__K&gc=&I z%EaU2@DhapJq@3ehNQ+wI{|%dey*8-#k0yF#*irF;~!0NThdvJ!`NBxK171qZ}fP@Gb+-BwVtDFLNy$&W#K=)&x#X=0}j9=0}n_WE|NtnFx*j%uV8)4aDPjBl6T? z9%(}7J+7149*Ume`;#mF_;1-wS zJ#H3u?R*2%*2j0m`Jpn@2?B8i3S#79_(S2nJ@ve(d98?%C84FI;`@9467)x~5^iyf zAc2owB(6TH&x|k56l_yl7i{5`A3+4t1mjY4t}~3Nl<@;M@R#^^#fJu{`aes4^hg~M z3F!&_pw)<}Qy9j0KR z(r}A>T|J+^Qvia>yP%w~GmzM~8cw>Ah5v)EzYL1&i^9B72n2##AUK4^HMqNb z(8k@}-Q61}xVtp&?(P=c-5mn_-=2AA?#!*abw6~S>hs}Lbyc6e_gd?Devh|uKb{Kg z!OHkv17ee2+~_|(R85IcK~FvL!65A4Uzmu43KA)(EMPLiI5~PFP0ZZQ5Z zk7bH-E*^h=yEKf()yw8F3e|1A&4@fwH3;YC=ci?7b2?NvQSP6Ysv$Q)dMCZ{Yh*!L zouO)(FFlA^(hge_#+yb{Xsrh1CpXc_VmQpX8Be(D_m^+=Ht(?>eLW5Xu-oyGWTh(m zdaE$5d|`L}Cq(ZFxcXFUG@15Fh*q~4ikD!S3WtyDaL$Nby4=x#B1io@rI)d_`Q@wA zk;IN0Zq<|~BM!>=kb+m#MM#YUFf4{Lj;6Yw3-7lRL5;lA_$isZrGrUqz@SxW82qeJ zH2 zVDuUPnR?o&?=%#YgL81M5bu1~Ij>!qfTWs(@cvqj7WqoCR2Y9LY=awwm?)@{sG8MC zTU9l6q!7`7vwGnt@y*NE`e;*Jj#gPYe1-?&4~YW~=Tm*gmoT=Lr5dAY zA?GaM@U2lyIplA#g{$f?s8+BX%hcOT&PnU!tO9R`{!CT~2j%=tFqR*N6;pwEz8j^I zq=moQs2`8a`H+MAp&H4USKzJ#w1QjVSQR0P1H*r=MK$_wH?s}aUuG@YRWehpQkQV6 zi2QR_QFyGR40#=)?_6ip0DkdJm5j}!*ND=TG|L3|n4|rb5F4V-boG;b?aP_T zWF1uOJbaCVg8M14$BPxEzkT6+c+tC$lC_Y-KdGxQ@0i=4aum8sXAYPJ%BEz4SXKzW zNvPXZ6_$2s$Asf5DXrVwx7(sDW~3iEQl8w#rJ$+XqCnZiNCmoO7vbN^hE2}oUPDBi zD?bHUTgc024)v%UC-3~b{jC1@o@Q0eCQIQQEg9<}*ZSo?AaTPAb& z^_+fDpX+*Lu~`j?rz=&QPNduTDw%tiT1ho%k2XjQ)J?OVUu)5J>#3H}dcll>|FHRb z|DL_#JC}S_+!B3;?C&ll_?qVq731o7Bgi4oPvG>C!}+|$yc&>I980$RR;gZ%({GVX zG#y8Uc+~v5)4&XyaE=;aIRLIOa&@FJ&ewWU8CbX0M8AHiL|(t2H|$#}K=FE*WAPXI zfH=Jpg|H%gk`GN`uOt#FZ;b^_E~CaI((1yu`LV$gi7kB$l3n#sH5zQvIo7}*YeP>j zBqF7>2%|bF-zqxI%}td7Ah)y&v7W&QoB79YsR5^$jrbo_C-tf^De^%Mtwz$bU47za zAOA^m;u1#jr4gn%a~i4vJfE zI5{T}-`&jRXM%`Ao|HFCMxv~^S(rhEOjeG7fnSrItUt@|k6+H}bIb$k3T|5X73%nN zk!e)b-nqe5v8fR8G&bCFbP}( zU%HIjkR603j|4^xTJx24M;s-KH>Q70IQzXoQb&P&6}FP1#2nT&(D)|`f9;hz$F zMsNMmEvJgDutY)ebtVGCZFyi)mY^>!xL~u}qacISFxJ){`FPM3Rx z#vN(9zP}aTe*T=uLBcs*m#m;0Hj-SOhR5T6x|XTOIdKwbEZLL`?RzTFwjCnU$%BdS zn(|6qaqd+);|9TH9>#ZO*i6PXGo9Q6=f=gFPx_i`LDC+zA~#h8(@(3O&m<(rZY#PYUxY zqAB-qgonIeY-+w=999ZQ;^nB7WyB3Fkb{$`1YRrpCVk)L$3@ES?q{miVN+(j9+v3n z0D~z&DmSaYka83YNm=r}2@*MG+3$>Oz63EcM?VaSdRupVdX?UT8;yD+p*~MO)$TUL zmQKM`uepUcRPSA{n99UqsR`YuZ@?*%eAb;FT%ML_S9KzRS^pHLq4Xo$&fqw@qT674 zKgy+VBzHGU6Oo8eXWW!MssrZ=Xk#R?>YwxwwHoBex;)LSR%0S-@jlWEQ%M|DHR%QA z;FvMm9c#DGVSgWXkh3%^>_pe0Nsa+PIdu0UZW8?+e#!i*6uZhjdaB&~CyZT6V-RX& zBFMMAuF5TBgs+VY)_I!WQN(RE?YpXWYF(3*n1nPpgxpb6l(UB*q^dj*eUHwga+Kpq z1ve=%3VZ<-n{+EhY{8&)_BtpQQ`TH5%EI2`C8)1S3}03OW0bY|)LT?Uohh%NM5*|4 z8p$anUiM9458{Pk-)=J!y*&=PN(%(c!q@xIs%9Xa58|Scz6D1;WShXv zU!}CEM+U;@6F(mHAZkfnf^CnL6R%MRYNx#P>i&ZwoydJVf(T2qYb>SLAcnVFw15kC zYOS*&u^-S*Uy$iY9QCNYjonR#30gBa2%VVqHA*+JQz|UH%)lgR-iSSgE`VDwM-EM? znJ($M9ldR7h? zsSMBNv?Rl_O&DMnIA(j#acl-*F5jaRt=l>7$ zChP*?tLJsBk0o3vjOH3o*M871H0A@>x5X{CTOR?EMrNVj&58It=a#K*i_VVLo;L4} zQYHWduX|ydZe2;CUhxdv^Let*^k&+a_q&;>j0BrmM~6WaCbEPXLmlxy`*!x(D>Zq7 zDqE>)zJY5?zc*`<5NKQfaT+IlP2ji>9%;SQbZ=4Gu#v_x{>euS0i5c9=&ZmK9eKn} zDK5Lso>{NPpz>?ISZ;>7gGJkv2;>md-i2N0u({=US#(9Zmk)ktdF9dX7m#T|%<^S= z5s2?)s5sq+2BlZ-74yn%WD|aIWo*94IK_Sd`$=-p(3dI%>=PUsDk_C*?(jr(zMtN$ zLrFJTi6nDWi}4hJu>y9Kr!Qihd(68sU8BK3B6Jxtv2f9nX!6{hn9wzsPOrYUHPJLm zWD$i^$=L)sZU8UBduL+2{D(kDqE4D*OPA=9zBVNERli^YPd*&ixxj-rG1|l_Xa7MV zbdy@^BX`QGH0S)BGt)F|6`ITsv{-L%=D|4CHG~BY%P_JyrXHU-*=$l>BdfV8F4sST)4ogQ|B^j zo;-l_dKrvZM)!_yV<>ScRy3@`w2iDotjwn^)m#afcJSpNbfbQfo!QY&wl?NjDj7OZ zLQ|@isKmn0s8DaYRza(id9TH?I6F+GJza_bMiz;RR>PbWb;o0DihbJzHC;OPKIJ|xk)qY~ zx|7MKmk0d0>Z5iTI&4gq3GoMflPfhv-53q8Zqjz+jM0oW9!nUE#lvG&o4Q43vwuRw z2Bh+xtL;jZn;>KP37LR9KUA$pR#Td>`UGIY+(TvxVhYk$A&gjm$fSaffGan&-^+kYU|qzS~ugm z9}6L=QgEAL+Akvek5#J&3=tuQQtxLUjW+c``vKxE3qVU^Q;VYP7|$Zh5`(!EM90<# zcG&d5m9b9&grZJ2?_rEYEGocO`Z1dmF$TK9g= znO=6as{%I|N2n}1=s2r~J-SAWX%;kcnBOc~efUF|zVqK^Aa3mHl2SV5b_$I46pAq@ z=bWO2LnAP*3M8HGVF*(KZ*qJv?7c#*Pwp!o(Wz4gRoPzedbiikN~HW}+ka47?3b1I zV!LUUeb*VcviZ@GlK&(+YI#$XG|}Vbsc}mV=92Wvr_8Mp6|G#Hq+zP+nAMfP0h%P2 zlkwB(3U*#yC2Ksi_sHueh{9S$hd1a~p~|1Z#?qJILQPv;6VXxAQ+<+V+30?J=Ym+#u{9oZ>1C+tx^lS57B5s3DngnFH6ya?o^Z zJ{AmITS7&&Dfbgm6+9s8-$GHnq#@(}h~1LKaF3P;E^bBgo0t{;W&)X^1I>6`k7qqU zv&z@+t=O4K-a+cdv@*shc?r{yMxbLy?Z$v5e;18@UGbHr<(%hRXs2I2(>*vZ4Fg2) zLWp?Fgw*#GYXu#Le#L%SWCE)83CEC^X*6E_kPJMf2+i77S87RfJxF+S231=UH%+dM z!$UMbg7=etVeQVrQyv-_b}TIkkGS~V^O^P?@5A%>?%0Rh=-n*uoGU zUZwh_olonWB=_iGJF@Mh?OgpBL_SGc0nLzvSL0n0bQF&J(N=P8IY0X+jpzTM@Lzy? z|3STfmz8g?OOtrfEaIEisnk8AF%u} z^WNmPhP%bTarID<)LiBWGz&|@Y|gh)ROA-?1Flbo%0un%=3L;LxunxbWpRcmIKtVV zM1sDC{hfi?cV~=ZPcR42CArpMbss+Fs0voc4nVE~ z@fyiYNAxiF3N+1{nU9c9Uc?Qi_yhqh*1Vw4L0XIklJPnyWqoef04}_uR!PQK^c|9n zM5AFiSi&c~Vc#-1l{k275D!A48eI^qf%D^~=48+^&2tSB(Fhgk@#^wWWNZO2=U2TK zD)XgWd&cr<_xM=w3O_5z3wbiypZoGak@fl(SM4uoymWy)UXmVbSZ1C2i-@Ih+Bki^ zn8LyxhXb>wBE|Fw1IwOhQ-Uy|AO8uA1fCuKl_pYJ0Cb}U2-6@Mbntit3oZQzbwB{= z#pxQ)escLMqCJ8laiHZgRjim<$S@Gl%PZ;P# z7%;70SpBXU;Xd}GUVh1%l>>qI=wO65{%y5??+-WRXD#E`<{fkMTk6lIZAqNNA`li< z+Bp7z%FK+{8*nEQMrd&iZ>Zn#{4V3QHvdEd%~4<82Q~VWJOlLMQ~DWX-SoF`yrQZa zuom0i4)5PSF`kpPQsqN0OjLZ}D`l9ZC(ANH?h!j~+zQrs(eARn79|>FIrzjrI2!v< z*9Xk*diz^S&V5Q&LCkx%$7hfx?f-8Nx{iO;ySC--x+ETo%b8E(j91g1KIUH!r^zXY z^&-PN(zD@iqa|0mYNIFFe*^83e9&`}d0-XBiI2vvV#V-#KDl)c)gN_5A6Fky_;LB3 z1U}9bELD^ghnfFvMo>1 zshz-?^-#TAS}KelLfE_okgGK1!(X*C(_)}5qMCAIeHFd>bvl8EihOtg z`_`_mXac#$l+@PkrxU`tj_XvO(;SU~(7947Y4UZJ+Ny}8AOMVQWNl4Uam$vudZz$| zTcZLbpo^CZlv?vftW69Ggh%F`!w*wFRD_52&T=m$7qEa>h{I_|Ud3p8AC?$q7tl(&_&Ug@HV zXMf%@apkiY>x0l^QE@LRx)YiVbJVNJOk2BGKIB|%@V_BX{{IKqc`q?loQNIvo!&iZ zq{(K6tqM8?*#=(GPy^q0YAUlKA2>=%y|Xmj-R)}L5kqg>W*`OrGa?~VDY+5ZXd z04%p;!BSFPN84SD!ySd3dGPlz*=Y`|imZbc4NOcK z>TJA-_&J8z1>Ay z@CJMv!c$0>a*>@^KnzGWS5 z(Ie`y^*CQTy}_!$m;lGsH}!|6M9Gw>XG0oK!odzLO+r_Z)#4cX=u{YoYalYQ_T+)@ z5xKWfUQ3LP1HzV9lfSm%#~Oy7s0PQ!6WaJCxe6!~U&sW-L@ zjzn-~b8A2OHYtJtEb%=bq^^;9!B=ZcpOeJmv6sw$%78UR4q?d&aZZqJT$vOgx*e|0 zYZV#ZLp?d>B3^o6jNBvQ)D4g|r%fH!;(MQX0gf26yv}xP0#0iGtS+msuJskdW#JMh zHyzPztGaP~iK6|HW;yheTwaYl%{X*KC91A9Fw3k${vQ3JLyDg@!xDw)I55etoG#%R zvnyaf>TWxH4wiNDbs_-1%gZo>@p~;?_YeN)RGQTf|B(H$v|%Qfb@Qw<{N>#Im&CJY zwYIqBIWh+FdP3SzJo3*k-KH-o|GeIR-h^QNeGxIgB`(JODJRvx$i)ZoF8+N(T>B_< z*%Z~gk&L)-F__o&B&7G;P(`lAJ-%Am$(l>f&`5WfYg300`gWeN!cx#iGc!6tGi0rf zu)S^mcN@dsPCuqwuq92d*;^O2Z)`1<_WYO1tY^owqyEn`;PYG8!nReKik^<_#28nWvH&sY(bAOh{bFfxMOr`#iK5?(ej{qb&9#y~>N%F@8pWLI z7r6wxzrA){kQ|KjT#Z~!%3Cug!d1t(5aJNveyviX^|j#NesF4`Dvm#m(tT% zt!U3JnCAN{A2?Sr(E67`VKP#0>St| z;elKnyTdwReFvM(d;O@DeH6=EKKS^Jc;eft^|58n_vGaRX5K_^UzQPdr4FhRAfuqH zqaGCq)enw1e18?RzxYUw627*r<aYYg7{c|lPNk>}qXnoH&Y*>AZ>s*+->W)ecusLV=+=Z=Kl)h(@Y3F?MLzu#&iO_6bG%vAyh=EqguuwEU1mVY8$(wE~f9+rtI3W&dV>giG zw?6sMLb4ts*Y~3w6a;j83xUwmv8naezQrn%|SM`Pvbyx>$p2Ikq(N`iN z7zjZAM%|2cwLgdc+HhGMAP}JL(6xykBCrF8w%lY&a&je#vz~~`tmz<*9JpmTdIH@? z*}l(PIFq=TPhW3tFigvJMXH%%SO5K4Z>jie(g<^oZ2=3{G2fdi>%h0xd#taDg0U|8 z`9X0-Z!YulN1}-230K2XGjQFax3)>z9$}x%1Z1AqfpN^2`r3_FiPZD}h9K@?>tlUl z*(X~&*JK13tk_caF`biWz94HwhSdFvV*)S21@$HSOy`EmeJI2eW%9O}OlUP_T%w(W z4?5Z>nyuZE^}|8Mi6D4^@N=DFbWwYT&OVb*v~%FdpRTc5Z7{UkjQ6&9gXt#xMcw1TT z0fEnT8yu{V8f(m~&AVjNq8hDd_hMpgWK}h-6Qoj*_B~}USK+`@*#jEk48K z+&`b#*(ySH2enXyJ2?<8kjLsc!$dam6%2++dCI1PI*ZUnWsV~-@%j)QZoMTqQG#Yg zPj4~N=jeVt+B!bN3A!I(^jtl7P{;ukq;WVn7PIsQjW>! z=z<-|<3e5yn}a;cAE_cd?OnrV&B$?%!Np3^Dho{!KCSkonq+*W1~pSB;xa*l{r&IV zd(3sMqlK$uoG$EI?1nSao1WP7S9oIy6sGjQDoSCn*&7o8wAfp{N!f#ra;**j=;@wR zCryIF@p}2oZ~nq{sm1&<45Z@!{YLE~_%Gzsc2fq2w-W<5l(>DS6S)DNlu#y@v@=vwHN9P=^6nP=EW)AN*BNsK(CZ zJ)ZzVv6ebNiAqP~Jex;N(+_p7Lr%j9KkWO|O1G7?6;9afxb=9f?7Rn(P_4F(_MYUf z#n_*M%*f8FOc)$jdYsUDKL`ydLi&|~9ZBvF+9b9+N2bU#tb;OZBDC8arsdS`j0o7g zI1paI!J7&u^8(wlIuNOKCuZev?61ZrC|PBd;Fz237M*KsNNfZ8bA?dF2%ejp@0l$U zsKlnXU;-H`|8hRpMi(JZ@;Vec;kPE^*8eGQEB-Tz=yR{b!BL?|<;uge+L9JC>0(lu z=x@q@P~DQ}3L4o+v?HA`=Yg+_^|#IeeotD7Hl0P`KNW>|fRFs#9TW$kdoX63)y&eT zsipcu;+KkaE*&TJa!KTbKxe~YqTzm2Z zmrZ6ne3)9KPhF7WLR6F8UHP|t_qK8}e||r^==o%3@>|g*BE;u*%2eJEO>GB@KMq|B z@78_SaE!$eiQG!N*tAgsiw3&y`G9?z>A}RwQHRqjqcuiEQd3m2lDIb&05a1H{GUoR z>T_usuA(RCK`*rl*?*e?;;nTl1<5Xe^+opz?ee~1=BSas+k4^?3XCUj@cp^ywoPm} zZ#nDUDbVibhw*Rs3#WQ{iR1w+gu$a^-M>%;QnoBe(61ZW|kYO#6m^G{q3E{Y6s;y zF6Cl^0TQXhiM8j@R#fZCUj-@RVc%(Qfx(@kJF!}}KL(ebG&jU9Dm&51q4u3}LWq~9 zZQ0+zw<80o5P?9WbU|0F{8R<&-%%(Of;g{d_*U=)-cHW^k+y8gUwX#5_BWO+{8yc0 z%E}E_!nsxKGlQ9c71QYKq>a?+=lJ~ z8y~x;Gw2_+xPNlRCy@QfI^p<$l|FbzJ(!IKsL-Cm&mRl_L1|r%T05D2HzhgJj|w1ecS`6YIy|{V_i7mzH?$#H&XmbU^!+uWzu9W5`88puS8sGo#XG5JKf#NmniO&Su}8D<<@q7Omz>g z0;%XT~%CHKG&+;DD&KDJI1?vJ|ZmFigXOeYo zK7Ou2UtsPXwYLEKnqB<%rRzoq_o`Sl=3I#>zJExSjX#HWCA=cgrT zgJt#{!WR(?^M8w}Q1AjTS7QQn_Sr@|YyW*mSL7bS zW-Y|-k4I~rf-)COvy0`)BQ_+c<@vCxYnDy3)ZnT3SeV6Md3Ce3^R?Ow;HDVpvSMmJ8%#9(3__(%_poCk4H`&15 z$GzH#h`^xJCouSr7dbe`1FrTeKZk2Gor4!0S~1hjYq_K5j>V}kyXf{!x!dk%46Mcz z#|2E%5R{i>|9RjOchxBGo=x*^aFSkhB?R8evH#iDOW1GLWrVj25YR z2u~no!O{!E6S<{AWIuF9YkQ&5+M3H}`S$`wR*r+!#ub^W8y-JG&A5hN)$6;P`IN`( zYPLa3+-e_wsGRYaqKM&ytT?Fa-<4mGzM#z)rpFJS2$RSCa@4I;U%lCQZA#rGN8j|h z@>9BwqqWu**%#^jk>UO7L3*JV18>BC&a8{SC50zTOTB}fQDXZ9?{G0-kGSornE(A) zUV*QE6=cr?K`mVT%awR>AMrA4iy+~6#{Cxynew>nHs}+GdVa>$uOnb8Z_~gefDZN2 zc5@@rGJGP0AFI~yR7WSNB|MC=~)~N zd23q@@@9@)V{|cSd0@8gC|Mer>8fJi$`eTs|Hx#*+dz%C=5X^0?av#Z9ilj1{6~k? z75MQP+9raIT)`O^0%Io91%;!~pqs%15}Az^Ho7i`pbf(W(D=lB?VrgC9P-Va?xoB5 zvf>K9v+}KZR=b3~$isxr8P(I=@rJ`h!Bnm}Nxc1PO9cvCsH_}R+Dc)3j{O0QIN$ST zs_zLBRGJtGSur_qtW#?7f21QW1==d-wW}~ZqlEEB;JMxwzZ$oLo&(_Zp!N2*+U0bk zseZ>+zc&iqL5I>qBfYK!c-&EdG-9Ke?~N(`l&Q_^>YaXVl{&cRF$WcUJP#3|u~)?? zsB%EuqeLFVbogbhIY5SF z!}v%{1g(=|&<}41Xgrv$ zre~N160PZKcCJ=gqf^xs?uvLfi;s*OaOA`G4~x{ds}#~ighlflwT|-VnxGzTe{)6r z7tla^gJnNu>)f?z94AVa#AOz?Kxxl>kAzwawY^ftOO_wP?wNL>pxoF!mofGBWi>8D zBkhn=1~Fqo|2Oxn7?w+7Qiox&#hT;-6%uv8M3*xaH?Z72H`g?ny2T55CA>X*O-TvV^S@7 z?QeBC4QNLxWA1$foV49ER_J=wc=G=&G+k*YYx6|fn%%=clkj(^ zuqcp)iE<-e2uQuWIvPg(x=dF0Z#C6i9kh$ zk-Ab9B4Ksnyp>X!WP?P>1nw5MnT!(cKVY*|Qq@SEN|$5yQv2%jIVnoZPs7=d>tnh0 zWY2oW`Qu5$!1atk1x^IGxwY@(8%;b^QZFxjnq_%818<2~kxyQ6#{dzoVo9?njK?tM)& z+;T=uRew)gSAz3%<5ylN%ZxeSnAUL$>$kb0VDky^r?WW0yK{847^n-EfxNPXT>!k^J? zYDmP2C=ugLWL8l7*Z5X%?bxK}Xct$9Fki~6EVvt^)7J8>cZkk`N%wOAb2 z@Y1oHe=*a#8lkX*nk;fCRT(?3kGzcQ_q_)mE2Utq!b8s3LJCN@>7wg{MAuFdJy)-A(z`w#vw$;USvW8lI}^pYO}KV4(wo z|9f~U!!xfWgrA`R_Ak)!E$EjIc~p7UKBL=sXWzFba@=ojhePURadLr5*<~ujkwG|qUQjg6DzMnQ+81iOu2&gPM;Wgb9#Nl~!n2^-e(v z(uW+b@OKS)8=Y|^{YC2VRNLd?o~iapWra}=)n+eTeJ(dPROS?$>K2aOE8rEhSJ{%FGClIBuP^ z5_3!fc^%diixC8e&FDE<^XyroUplJlnG!MQJYLNWP!BH)0I_1THJv$ljx^1fgH zamRnsEav$otxBW6rRty1-ER>`r5Y+QReqqYwpJ*I0o5Hh$yZV`_@MXiN>c@|c+o~z zOmCi*KF4lz68y7l+SK@898~{<;+ecnDbRT7P;Ufm8+iZyoRRm~-`bM`pzk*iXsTRV-4G%8(T zdZ<4GhpW%vLLkLUXjN6|`E8Nww^~#xeEe5A!tq`S4&xGUb=zzjIt#Ic?>^j6YIP*U ziw)$VLTT_@*$Cq)pGRNKDKGPj02^He5HT{Aww~Z&1?1PgMYv$!OT0P=o(Sk4V#)^Hp+c^;MbyZO|A72#@7H`KC7KUg~&Tv|^P2D2C?!QPrd`XKRew*u zrW9}RBVG>Cr+N1>?}L2MrUPXi%bnzCm1LXuI6%V6cx2z)YW^Y|Ae&>u1_&WfBbI++ zACo9nt6>#h=CE(T`9|hR*7&fR-7zYQbUerXSZMOX`Tz9PkGD~AmQs(-Xb8OPgTiAw zy?}XE|3TfMK!P7@Z1r$hp554rZ<$m*MSSNTO4ZC;!(dV9z7itms_52$3Wvz0T^(Ss zMv20gJHf`3ZS5qQQ9}#G-d%E=bW*9Wohu}5M84-uEsZVp5k+(X3X0VhWMn0azw_K+ zu=SL0B@|fWtm|XRTLi3sM5Uk7Lf#%*hi2$CJkW(t8M-dKDwM9$pXLIT9Z zczfZ$0s6Pt=lNtYw9QO%e({YS^a`pSC+Jg-&`4?Rki9C$kNA@l@1XWKgf57Btl(RZ zA;AAkPSCUCq%4l?OpSS}&!HSL&{5TA<0*Y}6L^A!3^DH^k(~lG@^lau>Ps}SRsiO% z>LbZEA85Gi3ZbAalqxWIgcP}|lr(q)IJ|eG=CE*3)F z#=Qdn_hz9a|5@HOBr7#uRaHG3EL2=~oHT?NU$GODQC^qGp{^72-ny5|X8U!5>IQ4% zMa9nC!We$S41N|^RC8&(f6Ee;DdC}RyZg#Tz`}#?XQo4*>E(~=U(I6T9f&1zL>7{^ z&)fBt#X5Yuby$c<8W*lz`O2})O4Qf*>$|PJL*TvvbFS7s3Hl4eC1}lNu~W#T%>EC_ z8Qs#_9*vruIefy56F+TKr^={UJvLHS7$3%^E}QjY@U!NW7DmFo>ebJ7jqRTrJ2zHk zO)ZwV;(*c81~)+v`(+N`W+{1||0o$MVTBwuBW8Y((S)A1I9`QP6niI3r z{ddGT2gNab+#fK@OQCV!|bYe(cL%F%=a_9YppXxTEAwAval^y ziM@lxC8%?dTi|~G{ssGVEhnocH7=PjMt&GF|%Mn=H;@K;?oDiXpuWkpe`b%;m&`eo-gi4$tS2*LrAMIUmk!g_AnTMPl zj+rnL&l_W{9YSk((Xgn6p~%>Zyu+8&fzCN!Dfaq)hh%M0)ROo;0UFZ;O5}+|1{HBN zY`b-e32t@3ykd2IJulMsNMxsFD&`SFf|BLL2KC2ubWx}Uo@HbqZiZL>1(+JHvb>ot&O(jC}VsR=X~q=1n%15E6BlI9G%kbaZMYHlD~=kiCzEY1X7WQlro8hZ zFSmv&=hsv`Jb@dPsEj`l=VIWA@TV%n9M-z%v9q!LulRJr+JtC`iKn6*LHSEosZCby zaLst4up_IOLp8wEG3#E;1T_y5J)<_`d{5KQNw+y>x5u@*Q$7PA%OFcG*jWEcUur>B zf~H5iF-M+26inH08PePq8%RAhviMl3aHI_C))!9*WJMooC87nu8X^`{{hUV7Yod>t z5)!STVin*Tn~}lA5nljPMErO-k8fLoKPIq?(EoQ}z<*fpICJgnkxg zGuTcWXuOA8m4Us-xq4nea5CLq{+Z7;I`|23ODXMOs^_OvA0AN!WxWJ);UT%P!@<(f z@SKWR==ztQ!kb6@63~vPQtYJ50vYZ$TOx5dRrx!e$hH(93o7_U61&*mq;^yzQ5N+h zRui+#m_(UQTHRJ->bG=?yY!CkAn(?@?QmRd~<8r>rZP0_h*uy|A!oUN%bU-wOOm*NI7zKo*6 zZxJq0#%qM(#kT)GJNY*J-+bA*|LdESx z6j^!QC?0c;ysG__i|m}n?T3GFOS@6iW+W%hM$0(Mc+YYxChMiX@_@^Ip7IxJ0ymJ% z?&Ed%trZi+Ry+AsU zA$aHdf_`(&+qO4qodcxn8`mR?Mx6pQ1*Iu!Q5U%zGvzxO6*;cq_LVJL={LpRX%~-z zDYvLw8(9bmeeae11Hft@^sJ#=fT+24w+3V6HBPv{{iJ`uGN{ zas$4w-Q$gK5ujVLz5Ee|^OQG!#g9ak?+;MP+&%j}EUy}yl+?0iKk$gd1S9|3XG&a` zL2IGzS|1f$*mHvu&gFF16x!;>ze)=GnXx z!08h>&NwTA!l>`D9bnW_0-1o0ko##h+7o5Q#;!}12EU0!^rnMUi6r;u3u)L!jzAMo z4^-?xmHQM&(`W4!-xZq8kVbi#2Gdlnx9$@M zr>86;&sTY??)PXpX>;)GRV5>~@qZO7o~Q#eG|^i^L6Xk3Am?qhPuGXx0sUVV_qzJ$ z&1+_X))bHDJn?lkhcV{YQl4vti#xy;rikkYU+KnNY;E?G@n#s;$dv|wJ3J(qdWGwi z@ETioU7W_+_+bmBS6#y@#gxRg$+#APt#uyl$@?DrUNPsAAexViCPG`h{q8;SKPr;l zq6N!b;Yh{5V;(1F1ByQnj$dL`j1ux4 zio3$KK4jrN=B3`#i2QATN{MV!!{x!^W#AA7w))bT2sHqDzBy+#FT=MGeVpD&T!@lq z{zesoA^)&0IuaGN<@LgvuaGuJzQZGN<^i>%vI3!{Az=H>9c4O+p=QF`omp5V8BqO} zxXDIG#I)N#EIBNxqTvp=<7ZFqXn*&}wK|qI$Rlsn7C|@oQ)0)YaTKz_G!&>*q5*i^7PGtC09mhH;g%U)YGhV?zk{ z3WNj}js`mlM*w-pT^gyo>F>z_p@77J`MjXti8vP7&&MeztU@5*i!WU#5OJ;oY9R0U z9xS?8;CWFJM*qWw)VxIA(GOaP!Q5UQ`-I%A(?;~UitXacQ!wIztML+QQ^tK77c20t z{DDMf=i&Ot84uqX7Ryi{t#%o|5ZLIp9&|w!^YH(W^_FdIu;H>cR$Ph|cPJ9vEoh4t zC@w{cySrPFQXse$cMtCFQarc^cXxSq*4pp;Y5#-dLvq}6&CEIMfP?hia5NHK#E(vB zXNK9jsKRXWM(UVK-edZ+Vu>?;^2H~(7HFF0I}c_zdV7EOkrJw2fnk2Bwxz(Vk=Dj_ z%+{A)ka(Cca`4XEC9huh^S_|zg3^nOu`UGtwP~4;RaNOnkJ@T2$Ebvoq;k|qUt4y| z3^{=l^2TK=MlzTx3BDQvpLI!~a!cix@=va}eEm!c!aNEI`fZrFb-|yVdJgDjvvAeU zud>F3#EopKZ0&TiVogVBlDq77Hp*AW*;3Pxi8~@l&NSl3(nMta2~J4N<9&ByJ39d) zrw=L47m?*5oL(e+V7{ipxDm}o*QQfD3bDp;%QQNvCyR4m*wu%#j;CxrB}L1NbJI6L zjxg1m8!!k!I7a4K>{gKGUATmM`E9xi+c=P=Nz$WYIzx&jx1>^0SeF8*C%AU)x{lLd zR}6#^nC|k~^N{<>^<|hj>+^o;TQ#D_=4lrnYpgnB#0_?l#$O~jQ2k2brBA3Zd1NJN zelW@!*Bc2%WN2@0^mpGX!hQUq6dj0!UUCfByZ!ndA=;!X@6u{G{Ivc*I9Lk0%C9<5 z%*Hvu^6-@*RLAB<^*9!0o`6%tF8}|n6XpaP6?q+&(ox15!tHn7$LN79 zf&N*-ll?W*1R&Aei2<>l=!wF#;8n|*j+dc|;lMk~6|gCGRvH&yA6Z0Q)Di;gFS3iO zR&@4F&|fn-kCngWnggjyzGHaBc`rJoA z+W~2!e^aj0CE!|zcJ!{f;Y9c8@#7oo<7|710+TEo9jvn1Gloo>e~pu2^FT~z>#I@? zZoY&WC5&cI+^7AFBpwb-rCaQ@1g8^o?|yX61nK2wR5cX_t;V^CcvP+8Vx}nJ*RZvt zh?-u3#hiCNtmd_%H-5oyC$LrY2PFTqCDjnlfOicVebAM`a(6_iFeVmwDWT14jnPNQ zuTK6Ul5ab=4`SYaj{U=*MaNn`JK(KNmwOAXL6;n>yPM?hSe#C6%64xX5XpApmb)GF z_KbWL5;Z#Vh&n9|Fz+3qQnB@Hb3G?)ExvmGtAU5m7XV$EU-g4&n36C zb))2-LYVNG*v-B>vfO5BYGMNB#Z>BEKZ8}5Nc~h;z@Wh0Gx$m;#~;>+5li*LtH@WN>3ns8h2|$iFQ;V#+oJd4kY9pJXY7 z-Ewb^wf4149iW|_4N-y}XO*5THR#yDmYvzwba%9$vHN5DA2?Xs@%J$M4{hc~`HJ&H zgi@?_wO7fNC6FG(8fE%cFl17`#+!TEb=-`2eo=YA&wHLPHPGL6{PAV;(jcWc<1XJQ z>?7;gciFhdJ78e4{|hcMFWpOi7j|uXmZ)Umpd&{rXIrEL`KOck9G{n_r9=e$MXL$F zMcno=oRK%O;1aI|kVQAG4{RRD>}4AE%Ru~Z?k)5Pme95amPvqTaHa_C!g|1;A5$+Q z`o;0szNOAQktV4oHMc3FPFW1A8aB$XYzs-=d>LvahJaW>BRQ0Qf>#{Ma$3J=IJAY$*X95p`R^2RgMSwZ?JIBrVL&R&G(yGmsR zK;Cs#uErbIY=QA)1Z2F0j!{ah$2N`uX#??@A%LL@`QD}H48bvXqV`sc{Ip;){2sWQ zBei+oA}o8_+8!3~%7w#~iU%^;DWBwPsoAue)7g)qn|x{;-nvWrru10tl8ARzfN`to zkr8E?P`ab{-?0gxaS9>>U#MaAv2d{Fj6GlKXzP+6-YImgRc!Qi& z3X^j%BxAU%Id#e+Sz#9e#c+Ox9#_VkR$}n?nyiG_R9 z>oa4MWU0~u)Fh@DSBjZ>Zq&Z?G7O37IqQW^H;j^FME>;xzRXvc3u60Z0DdAcX1ap%sXlt z+dFJ#YN>i84z9{}@phm7Tqg*LD}c<@ECjXT+^H8vb=TA6n{H|F%7;m~0JX#>UP4v) zn1whjd}yz-f(Z4WP!qlvu$Eke3k?aC?PQ>t0>~&z_7EwvlM3&qt+CoPRbe*JL6kGg z5FP3VuHUHSiKjM6_xBgrnuUx@>d@a%(1XKv;0i*6ij>1e8MkCJdwbkL9)*oZd&+!A zE8Qp^bz<;@L^GGMF^_kmfSpDvlXXx5<%&O%-6gW1vu7ezMq7%!vRI_0?ZRt%(UgWFj4+GHwy11Rv>Q{E|B7y4Cp=t_c z)+hI}#uG}qON^xV>FELnoljn_u-um>y(2*kLAfeqt86RONHJnas(j|yo3Knv!WW@LQx8en@*qY*ZZ`WfUwK?}g?a z5VA1-W(G!h|33Pjm?*dAR)k2GA!FhW=6~!oWxMF_)D!XgSNCMQS03GTb}aY`n7w~? zRU3%f0x9edZtoaImyTOhdhVqyZ(V6zeb;vO6vMjIK3%^e8*)L2mo|3{`c^L}IQ6Ym zXwVgIP&>$ykGe+>vIetfPnwLRO^ zFiuM*(e)a1_;Dg|>6Czs-p%BY{xE1JXPN|;zBL~frpCD}1a7Yg%QNJ!LV8nu@-Rx@ z;eM`e!Y+u00TLck|ASkxis3b8R1+XPlql4Z>D!r$lK4$vqHh)$!vauPjRX)s3@VXw z7JN|1{!L=Sm0YWtdeOebEK|;ax45`4!=>+vIHz z4YTj&)5y4^ZVOe85@_vg7qLIvR=XB)?oquvpm)}tFC%qwNvxA;D4i@!vaV>d9_7_q zIODA({BD-G(bPe+)s6cqlSN-6&-+HG+0Jv#e76&wRh^ac9~x{43EhLRFDh>h^AL zwSbMVNHcz}$TrE7=I~s%^t0BQa8`Cj@u<5swcFD29l4M8^z~Qr5z}PN`Ec|aB;0c> zFREyrgU%Hi(&-R|0S(9w2fa!ZgCCuvLVs~-_ZG-=^ zjp39S$5$^%!?xrJ3PW8e3yvl%4GhQMntd%xr4~Fdm5-e58F9$}#!6C+Lw?4h@?HpA z>*lICM`voEEujr*m@>5&2jGCB3k+2g>c3V%`zVec!arzfw{U$#*13wxBC@e_knQG8 z>17Dy=SfDPO7_eQ{V7q3?BWXv&=1)iMZ|QA`{;c;2>8)KL}^AWX8g8)F9_#@Jx`Nv zYJ!jeN8jwggy6{KVIEQXTJgD$BtJzZDa=H!ii)pDZnp`2YyiJyXBRl!C2Wz&`2Ahy zxX%o89|a6HxK{6Wn3pxqUADq;(z1`GHRVFa6j485>(6bDk+&(1To{$3gTo z(V=GDk08bMOtX(dM>$8DTF~RW$!?YoGKHWi^{+XK>+JT0)m)mA9gyV9k|1uGe43iD{JTG|Z!XAT}A|x%TFOQv# zM8va1i*+u^D$IhPEV{xUf3;5Q2R1d{JR~+QNCB2dyTbqvg0(Vcv-MdMy zlkInj#Fg0(+hwyuPnkSdlz&~?G-hl@XsOty0jk;*4`|$1#Gd1+P*29o^6ne8 zlH@ie11S_QCeSkmtgk1#CWw=|*zJ*J#YXoSzn`$1Z>Xj?Z0M|Z$Tiq{x|LS%2 zTKHW!2);r(enL13npC*He4$>LTN!59cnhl)2wtwoiXUY8^>t)CEA~EzC1ofzWhazn z6Th3C0|qDO+zK2HrgsT70AuGX6k!AwQBR?#2>)|nt98%v{We`SG5^{tf%JmDXjV(DCPgf7n#C<{YB0M*99PeZl`Rg{IuVrd;2xQG76^wtp!7_JAlTs0w+r4HsQ4 zXuR%@g!;*Dr?xg{Ttggks&Up|BaHewir@S0Cy~SNG`~gmaK~nPO!(u?DwFu_eXr!^ zTBvt75cvkVm-Q!e!Y1)WzbJ@JA_52=8z*JWJe8HzfWEiH{AT=TmowBP%C@yGJD^?u zotOWgcRJ)0?AG`w&M?&MS$2XRzWI;sLmk&iHU?#Pm`}`oGG$ir9~l5b@HJ`=oaHq7 zEuGr&L2_i(*cpCU&E-YJtANAh(N)7WVgHzR=9@pud-lU9+6#?_gd#;h%f@_d*~+k` zyFDvDUP7N{vh0{ACZ3EiWZd2rhS6^OM!j0B=b45eA(~atDt6~7dNkVX;kr`&*Tl}g z`h0SEl$N21;kzk>@27tqs(I83^Y~gTQmiscpxzN@k54I_Omw(R7h zl1R#TIywajYJo9)px~jco=10wF;nM^(yq}N+kq<=Cc!OGWWULL2CGmJyKw)9_jFrW z4-DF9q~NwFFk^!^S=%#6z$ByEodYFz)dZ&;*`ariPw<3D>i@)F&Cp-LwFM+TS{n<$ z=MV@mELRsWpuViC=)Y(m^3~8HVJ#wb14YRSPgK2flzv2hDIAN9MkYq^MZ~^%EaP;^% z*_JY^TqOgYCnD&>>4|0as56uUS%&RmT@jcNR4dotcCY5U*ryj0TamZ<7bt^PS)J>k z+AD+PNx=;q4493NmDRg;{2hqqCv-)nWCg~xcPH27};25>T#j{D>|g&Xu8Q)B*5AIQc~r2v8583ffLL72uNN8J@J>Y7+Nw4I3yJTAtlqcF7>tmS2>@9veg?w#4vyN=cKZ_zsz^9;&5JIn;L zs>0YL>kHNT`e>p{e)TEz;b#--pfgkZwx4Av7ajqwW8Bl`h6aH$Gp5@LjMITR6K1`sA{d-6sBI__(m2zM<5 z$_c_4Aqx~e9_?}R(_v!E)y-~R@ahK_+4B)){AOIq-_^7FqeG3-9#hG_7aacwC;g1l z^JPJVFmMmF|8E%`6$BS215X0YkMwIrNS*MSPj-^LBP4eiuqS-NToEsme~5E|Y>~a$ zaALrG4xI!dai6RNQm^JIt;99wToo6efb&iaFy#_9dP|C9Calxa#E3 z`{@<08A_9~6U|p(&xrXOQjC|Y*Qxj&%h3drk`=ty^c`$DL}TjEh4v|QFX(}+WBu`$ zDfpfbzZt}z=X=z&@6PP_ghy`*Uaw0c%M`SWr>a;RMX+qv9a#6|3Ebfhkc+YucC4mU z7P<^(?ke8!cJ*}?zMBzA1ZX4Hf_2lt&Lyo zNhRI-7%+A4q+NtH1=UZNQFm-JcWNr~MJm#MIehsFqQ`_G&4x!2vGzY4b7`Z=?6&X^ zmDhdVHAnY3c-HdniWVN4X@QGV7NNW=pktCaKu_#B}anorC>1l@`0U!E6LXv>?Ht`-}$TC_0X>?|7c1eEn>IIAe#E&Q&^ z4j)!D{yG$xOF4l-$&t5wsi8)y@rLEDqyL1E$S)}A#`MnSDnhSZrj;n z6=@LFD+Lzchhg5?e&Q0Dm#eymxUSf@ZGi0Vq>~(5C1a>&tO#i_|=84Z1Dq~xQZBAAHsJ7nt@$Gv-a!>Ct zVN%uu|K1;8{Js$?Bv-Bd;S`0LM!#0B9?hM#Ipeoo5!cLz${$_B8g6U#h$6#H`ldBy ziB@B=Lgi;Cuc!&svJHs$Uee~F_mf*>UVWxV72?^+vc0^L_l!c0QoDp8cncBxBIDV>N_cYdm_ z?kwqV19#|wVL*aF;q%)V9K~t7{5f7*oysZ`V!1qna$rE=17o7zF0`9{;xVL&U%&Rj zc=dH^^_rcq&m00NOe%LbMR?Ij3feEou-b|#N)DlAwy+R{ml&nb4nn`2ZM}V_ms5h< z@8$`#%6Vk+4DX^mu6tD3)@O8Z|#Kql^-xMbwE-E1tDX}M~^)RkiU zkD~o4%J9J8sz@(AcOveWu5ZuJQNQUqrkzjJ7N@GJebGv4p*e(ND32;o`|cHXI?{mJ zPng0nZA_`4?ZlF_h!)1!0ZQAO?x78|E2F+Q^M(t86@td(u@xKK-PN_=&@j^xhkzZw z{xw%4uzM&}9Mj5>b#`y2BMW7dn%Nf#WqZ8nMk=?17Xy2;F;tx0`SMtp(x zIr^h;=t2j}b%e}`cv83w7eVu zT|2MkHP=mPtNRuXU*Y!^?Oqb8cd>bH%N;4V8IwHlr-Ns!O1Zw!#BzI!a_H$SsL5Qb zywJl(Np;Q~ZDO0Z_+k`hHQUF*Igdg+X<^GaJb+$TQkfnADCbdrSf$Dbmv3hgHkpKb zg)B`KVfK?Jg7BupwQaW!fOxWcWF+}2<>%$|?dAp>aF9q%?QFQAYN>2kg8W&&ofadr z-Ej?Wh?6D!g01y{&&RNnC3^az7n|p3qm!>O)}vAnFZ36>(6KKhVIVYk6uBbuiwI`S zLy)*aCLvNY_~hFcN1owmOzd>ZyIHFh$UR;$N?I0rA`BK3OrI`I8U%mxs5aat){1mn zY11qByFcw-5u;>YdE8Ay{~FmTzwJa(6~W#PAF@ccMc_HNXCXc$$hQd}b?MBpQ7882 zLr7Fq-2D(TbJBMqLGp}UtNV+w>b~Nr4c%D$y9%wQ&#GN^rKdn!0~4Ka0+alcykTv8 z;zP~wQB#$)^UqCo=f(B$7R@fzov2&$qRw){`>iKT6E5}t+-+Q48BLc&fzXj^h-7Ko zFuN0RCUW)R#~$1fz8f`sgY#hMvVCFH&{MOFQ~7Wy7=Jb|7etcEHx;EiNELl^N2FYliXrSduOm8E^eJ(H5fnYftobL&&2q>-nmkjJ zy3**ggm&FdyuoDEwrxcqx37l9FD@AD8%`VVWUo~XaKf}0^VKT{N9(UjQkfUkm^;^= zllR&ORTw-|GMgANyRpIpL&}NgOh-&Vir>;|MDP|YlVG4CR_8ojd5-w>btyme0Yvhw zEx?k*;Cwxr^1M^zM#OpH87DC2Bu?7bE^oF{d(#Y_ZO>g8rk|g5uiyW4^ut4V{XAp) ztjK)$@*wp4xYZ}*Bu<9F&`$GAt_gP~8RtD;a=&v9>BCKIzc2M&jfx`qT5dg}TjUR; zV5x={SGNShF?-Pi>Vu6gj+?=B9?@(a^8K!eN7eF)s>9Fk@))_v`G{q2o9aeF>08#b z5qykb(}kV|mw>0^vwIMa|Kz;%13@Mp%TzkCG))X~>GqF(ZmQoNPK<@ zJMui;NmRn@jf(GX6A_+>aFU6bn*}B8L7=6Tk!KrMQ^bGBvwFif%O?E6K|KM)bG**gdBR4toa7tf*a?^h$uJ# z2Q5*rm))jkkBnPY$K5m#*gW0xgCtB{p}>|Re!=9|t@O`>c_*FigpaMp`@PdKDqJzz4!PW-ha8R!Oi7`asBn`M;6{jhyag@!$gyitbXZ=)ACsy zVB6p58-&+()XtsQ5bbQK0{MftAvccmIbfqiEt^&#_F&xPvXNE8in!;TAUi&xG#NR_;HGZ1&(Zxs^3;}5_4ll `>)#!#P|5Ci-PrOOx&3?kH z3h%IigtJ~atn~NBr<~tL=ePd4FH^zld&4HQ-N^x#!lRjxh0AK}^F~(ZaMsr|)x_9_ zDPE!{p60ho^w-c-1C^sebt!rT(Kx}v8;mBN;3KuM06&jGTs;!`L{G#AuVTiJF=B*FYX<%XNVt zFedRx*(iRuJ%(NWLjFYFXo;w+dT90cPw;GrM9wpT6G$iZiiNm%*H&HaPOE3FI)US-iY=45>(wdFdCMZ+O>i zt3^y)?DX6DRlr{eQdJUM=iy3WjDfcO>e3-kygpbYCncTiQbwqZ>gK=`=_pf6O%~2~10i z+~l?w+TG!E_95x~2;oGX&ok(70UkwuvPR_WL1~?|@fIze+b1@?233i!+G|Qi2QTfH ztIbIR$5HpS`hwt&RTN49imqwesG~bDmcik`HLYn&NJ~^G4ZN#{X#DiM5(K(ZH0{uyq@k<-RAC?RdiDnBv;1GQxj zG97+5W@3xCZ;uQrK7^S_-E*#{|H~%OAL$w7^iwZc^%r?-_Y`Rjf^BxOF z78r-1ZMBnQ_uPPLUYWNxaUMC8-v3RreeJOG92rPSkR1{b$<}$Z-Kno;j-@o5Lcd_| z1kDIK2A4ytj7W;a){UMheYdDWg;gRLLiHBcY@P?$x|sD%0P070w>vKriP>IYJ5>0) zJZ-B%;ckIJ=rPnrI5}%o+_HD;(Ab(q(5rNKgt=0Jgtstw6#S>#I%id&#|WO_F<=xY z((ebal-%u!CmL2;E1BW3-N&F1T~%owQzWLJ^VLQvQ0K;ih`#;|mFFk?n4y`zS@()| zdViWLGN@-r0O4bR3ZB=2R?}?doRc)qnRh0F$CpsnQ|B2;APkK9AKV$vi<_U~;|$+s z_$#;BCHPlnN1$bsOLJ;Xiy^!DF{k8{=?bc#-Hnm3pW0*Z710iVM>g%_pYkl_bENB^ z7^KpeT)SDdLqRNeyE}}neEUTLc#`DU-rz$qg3Al6ROu7q+Y;@7D^N+G_I2&!dEgLV z?W9Ic)lka%-Dlan*{`%Seuc813GhIoq*Fa2RwHJY(H?l{fh!><96 z?^8LKDuG0P^3$|epn>)>QI29Gy~%rig;q%Vx<@bXNWYnC=*z^f3*(?wZ*WGJ*!#b3 z+-^&MbNg2nqWjoC-T>K>6OY=Qx~T?-W$IB*rRE&ADbZ6)^a1}5*vw{@qfi0|EhFzQ(63-5n$z0%c{44}Ti2CSVmNjj6w0a`vIT0SjS z_5}L1C+ZLG`ZpsNM&M_yvXYYw`|7G9{XtJlUv&K+Qy9PLQkaod=Od6fcS+x6uM!}26^Z~7;r?%#3B;ZQI7$D%Y`hB zDD zL!GI}3)VeA3v2D}9eG8WncYX??X?~fE zmW9_pLUK(3T5)~}GdL%r z)-$6raojFqFBPa^Mw{pFD$#{ssZA|XYQCQ0gJjs*TbI6}Mp>JM*P87y4hjl*ufhxc#ne+p^0E-qSI#t3#@o9DNtJj2GfY5TkE&Z_fT<{5&$=|-@8s?=G$g|__TGuQyVgf}p z#O+e(@uxrx+WYi)%Z%jewR9~nV*O}{DU+u;v!NoCwU;mBr2f4!dxfKvcZZt&kDvII zgte$PtDzmRHXAWH$^fu?$*xFQ5!ztjt3e6)+;Ev>fJbC!_L;Mm*bS$hD@!7^RB^o}kCBlFfuTsxOEV=Zh$YiN5X>dFbfv zbbNxZ&k^4wTw&!NrPd%1NWPih?3gOb6Vsu=`y3xqj)&v3+|(eKlmh>EaW>~e%WugG zE$!$<@@N3`uY_RH$wER(;davb0B!6CzBQ)G)xUM{8Ydgc@eS0=Hb06Y{W@*P0I4|| z4Ol^#<(vSLw3esx^Q1tit>u4k1}W9nOg!^_#!qI2{-&q{awz7646;PejnC)_U1;Y4 zIV#(*H?!(pZO3g6!XK+_ zHT-h}X3lH;vUE4#a>o(oMlrkWuEtB)zb2mUtAQ1<3!C>F+`Hy&t68R{ZBF*i-dN_W zh={+XGUW8@MD!7D%_!V!zgT{mEQaXcQF?#d11zc^ozo5r$D{Z052++tl)p$HaBv>G zl@<|XbbhQ`9>lb$zaPQq@y|N-{h1dQZrW7@xX6Ys{Uudq#QSYdv@2^jp%ad%1@dxn z$SCm{V9!$P6%9vcE*-d?Y_w;s%fDa7&?++j)pV!z1F^B3_0EevW$N)UUGIY8cC|d3 zcaU2=K*}aj1AEL1OnsJA$cc7N`xd0dcitUp1n2jZ$7$%KRweSD`_` zl8@tQ;;%-fzdh0drz)qa%F1U5CE=~gV}JsdEC1|=4ajh4)v<_9NnOWRH!)`wXDvAiFm#pZ0cZTI92;(oMhVKgAdM^k2^pE)?1k($Lg zTMYdapLcSF1(|&IIt-lRyzte-Wz422KU4B+iMQ7>Oow_(F%U8SHbvdEE z%;#SXl2&|avDk8;j|DJS?PgfCrbWZY6cpQlV1hDT zA+ZB~fsGO+F!y_$IVJ5JiTO`<{wKt%q~KhHF2~3c?_Lkt#Y;tdEkoODD%fJ=B$XdbK*9yWrdR$SK?$p> z{g1FD9;sUBsD}#l1!DQh)mXWI`Dl>)HovC$n;^&u?L7A;>drX9>4U4@WVdaJ#43qd z(j@_<>_TI~)WkXYQjW#e+SJ$ufgkl%bGUnb{@N1nYT6Ue7f)GQ%h2pKcZ!NbP#EQh zj!T2TYla5xN*?8gYgFq)fn{L(?ygkT)QOe@k`)}gMWH~S6l?m3geW=0vU_lO9*Ks$ zyBKz(6<|EC*c>TNh4&2KFjT$OUkq<*FGO(%&a}yQG5$E4qcepD$%d9KTUOuSw&u~6 z1@%05?@{O?dQq7UUM-nKBSX+?=LiBOb+W7|q@iN+0tPhdEBA>+B!)%alZdDKG?KUU z(!tH*ZR?k!3nwmu^sdy;ea<z2pY9N} zDrPC0B`%c^NXa(~^-$*w)x4So^U4le5JKRtnApR#1xKN+7bLmDnC-rG@8MmkznE_< zi5;(oZ^Y<8$Ey*wot>H8bVK2Ch51qMZPrvhT=rJ%SPbk+4AhJNi(V-*^>;O9f{m~W zo&%O&xppYT)p1vB@VvEULvz}O|APY?;v^~*hPKLM3L&{ylWU<9nq0KbNPj%4Kaqj2 z8*^daG>=j=?abiHtW^&UzA&WNo+4Y

BleOu!O-<4iCWZ(G;i3}JnJvDT7{oZRkt zv)#KAv{swXULeCIaWLYNtu>3@a~?DsGfR<3_?oQV;=Axm`D=5^>(7rkclj%LzZ-Y0 zff%OZMa~sj+@3Ssg#tHgRIF!y?$p5`8#Ab>D@h~@s*sul`NFUGFD zyJN(rRq3Y;hbr^yMMpY_22Us;@ctEs7HGN(eIr!4u&5fwbdDKYcJZ;OwJ*pADAFYK zN!erE_@LA3$Dwn_FY@K>dbe7*6a|KdrHK)?2v&9~>3@7wm0-bkRkp&oS!L_{=H0O%m&E-Oo8OJ0;y)J1^=oJ~W0ifh;ok7XUl3n17qy8J_cY37xtouXR} z423LiwLOWN7t&~_(kZr%i}!^No&YWvE*BFHrARSxHtU-S3g2}MzyHlOMK+IaTNk** z%Ex`Es(i9(!6Uw*UWukJpL&%3?SmJ1m}-S%ySc{z8|JrvI>zlPy*~qngYf@@D>jT% zC{+!u#fT;$wkH-GB?k9)gK`>62E{?10Q(AkEHbGWboTg?_DVblVpA?gx1vtsWW-!> z?RGZCkj0#uVnXVTSXb8OmS30mppJeQJKBxLX`4@uWx!nE<@e$F%HdzIFZVR|!$9h4 z^Td337v3O9?P+yu2tRD14{C3rEw$orHj4a2!wSk5!?1XMlctjb<;IqAO_qGL5w)+)25x zbE#_eu@Dwnp$CmR>qHL&2aBk@Zxzko4NLxs<8;77FQUYl8!okj8y+ByzO!GPLmLTL zQY_-9#-a%DbR)^N&t*@$U$7sgw{AVe_^&!l^{#4Gg?WGdvFBRmH2sac%-}wXtTiIu zS1~HC@yhxrvc(bFMcH(D5PVz8S`e}!$k4WgH9zw$;buH$^51UlXb)o$GK?28zAA!Y z4R*AM-Pl+r)h$l$lO?y}{&za?K@*Wa0gmu-^OIL_)C5`ocITMU6VF}qE@;lnH`L1f ze!v8H+#T}Xo}%i+X_Hb^v}CiOu4BW=FhGQ=CbvtXr49+a8>VXv&<>a}x>Wr`g>55# z8S97sX=R|M_D84I@hASCin3M58+ayj%%Z3u;c73wTaMHD7h5l!oh0W+UxNfH?8gpH zqthoNJE9*;zh3BLH#_X?F(YtVUh~48w@g|T`TMu&V$0-9fo|*Gjcq_5zptrU108r7 z`>$eepg)1uRF8}S6}eN6)$db2{zC=Dr!Br`?9d}h=vvt()$;h6HEdJ~X_?x#bj6r}BT6*uo*&FRk}7Mt4I zj107dy|AMx(y7tWb!^~eP6y5hWIx0I`!=~I#vq!4HCjQtHi%PxwQC12^+Nj9Vo6=ywOH=Q8bvm8N;a(>qdf6DZs1aut_9LrJP>@pGU>Ix6QM)6P}va zY*K3L@X9_Jd@0)x&N^VJbPD(mxWDLm7rl%a2*l}!f{>0ER)+ra@R~rEf2-0`rIW~E zMsssl!uO&OQ|BC<({4vQsA$UKlJ%Z3z{+KeyHiVbhSYr)Zcn8m@{W`huZ!tRxEUp$ z=Jwu5Db1?%(_=;SF%a&yY*gw?Nyt;N%>L7+Dn{juT_ zzb_C^{p_$nVWB{cuB%y}Xv$jSr{JWJIOnPDxeFy%Rk%$o^9JyR3O?sjiB;YGvQ4o$ z@LF8hxxpl^O_1JI+fRKPOz;YXq)J^if|m_t+?rZZIQ7e&TgjbA4y^le6nb=s#r}f> zhEPfn4+g+8?W%*bnpR_nuYX8_!C|FG+A(P=`^r*<=Bhp^g8#wY%|b=|ROo+wc#=^j zNV&heEpnoExo{eglh8Ebder)ZnzGg~8Ygs=ZCkVBOPRR!*hWYG3sV2Voq<$ z+U5Rq_X6%9w|ZZ(G&x%Te3)Dtr1!dS2Nk1Pm*Pc{>UTB{-st$W{>iUSu7sju&Ce>h zL090+!vYR|5-^zAkD9O^U?_>1-x*ydZgI8-bqGOd~hVXk#ZXV20be zlJ@wL|L>ETl(}>8tJs|bz0=q)_an{Iy%ZNy*9InfDutAP6TCS*H9wUFkMz&lZQLph zbZ$ZDHcom9cIN?KlVYka4Er6_$I&x!jEsDq`o6|zP#f(l7h+npbutb#`?%#PADP-A zRLT#xfB}Ru&joFS=C+heJ?bh&mdiF;&C`|fBWQFClDh$ffz+6bNzgfnRy!tyM}ZDj zbEx1u9vb*eMwmzs2Bf+|ZcmwA>YGx?($|`S{qncv9q;axN+f*wQMHFs8IhEYt_uIb z#Wm)ZZ}H3bOq_=Di;@f`BaO0jo~t?TY;Oq}ROimOJDTKaemcQXd+_;m zq01M~iGL}ai>ildj`&;+A}0Fa=?CPJQdpFjaeD4YXfvSObb^tVD%Dz$Uc3K4xCW6> z=!=D&s|{@3pkAbXIC}x|w{AH| zimDCL4Q=n3REmT~nPQSEJezsR-L8{CfqP*!P2J70docWqpK<-FXO(l?LTeyc!Lq|I z)+2XY!ykYb)n)P&H;u}B$s_+&o?|4_l6-NA3SK_3w~wcfwN$Cxch#O#lWd#`fcRwS1jvhgu8KAKmVU=%P7?ZAn=LIbY5klH*N8A${*X*~e z`vVf1L`CMrp@epLx=pDuCap%^iTtk7{^@c?vK_IlwlZw(Qpd{GG)OOC7r)Tl^)GUh z2~|>v+C?^xFRm@Exy|cC>!+iK!b5J`)jtZtRBT%OkUX5f6h-}ddt0V)TsH`SpsVj$ z9az<}PSvMZpNOeADTkW?ohdr!)nmFAoI9ANV|6cEm#(*9fvdfyD$8v)mmljAqeQ7M zEtl}a5Y65-jR!G~1VNe=mxEXg`NSua+G@sl{rpToK%MX~qfOYxA8AQEfL zf2v>C=xdG(d~T*lD7?X~V6ztCpr$&@wwpy}r_2!Q?H5OlJlnZd*g$j(ed2d%tE)A8$e0lt6mw)&eEerlN*J8_iltf7`EHND-k(O ziK+_Y7eIMiw;hJ(o%sHP3k*2 z^AE-XQQ`~q*=l}(-|tw|oImH&CCN;Vrizy>>2#a!S17m^U6V0J2(+tt9(w-JY0*m+;kg0vYi1 zJHV+b!`QWdxfSKXn@-^R{yoT%x`M*#W2KC6k?uMWnWV5al`Z<7GNe;{8}e&izxtF( zHSuV|d%#t{&p;m#!b$v5?zLiGj3j8&%9_A0Ph-o2F=}@rxu#B-yJ>@k^&Z1~BRavs z+~HR=0?QIQ-E}BCSF~mGY&v=UD1a`APCe|dtawE;g(hMc>2|8q43Zu#{FDF#vvxY_(zcC%fkSTAmIX;{c? zK?vQb7B34iy&*`1L}R)Y(-MwQlz+khEW=6EN@0EInd0ZGz1yTEUI;ml%!}Qz6-_Xf z-*%q6%EPUb6l)(A}7^4F`AEk9lk7GVahN4-!1~IkAPCKQ)Pdo_XgvlatT?%%+D$08mr3knXs?c zwA%s{pu7euJiU_|)PmIUWal=X=y%G-<-NQx85j8>+uc>~!Vt}r&rIVZ)LT(^7ubFh zQF1$TcbWipsmkRM$11;b9zYVs(J@FJD@skj4yddnIxNfTMFH{&m!nURN*BCPZi?sH zXU50|XNxtw=w@Xoof<&mTjC^d!KA9TGCqHjLCdtsQ5=6oId97-IXDtSd`wArr`Y!l zEHQsiiNt!nGP9eTlZ7IS4U4MDkt@_mR1Yw%w2-@%pIGC@@q7@`2+p>#W>4|-05o!< zFeXNU?YKJ4U>Dv(Hy&qcwMAF+mO<;zv0jIGPf^kkL8xR8I?pS0&Q-am>e%&pLCEtuMT->}W#EyW833MD|G6u075+TsO@Yw;8bQlNMURy=rdE5$WX z+})wLyB7~0An@D#ci)|z&CK%wGMRaDU)Oz{$9a=gs8Mlvea(N%@8C7Gp$6OASZ!I{ z(@fV$7;W}z>^h5XS03SIi8j>K&nj#zt=p#$vIR+J9?s>4YR?vqSe#w)ldL@1{b<%e zJ51vDvdt`fn=V?EO^7`J(~VW8lAUt3Z-@b2KR2q08vR`ax?GF)ntqRFSqW&KYh}^{ za>>auv_IH=lQ&k8zz6-J$5N`hdK>*CX@bSlYtUBuE86Z^!mDPd`$^21(X?m#lCV`t z%Ky+%T&JKAaS->fjiEOF?&SS5hfUFh=7GInoc{nbrg~6Glw3YaU!mmiUP$$u)$KNr zgVaW&iQbR=wsf~Ti!nDSGGf*@=Fi3nY42B6rNWnfqmK$no}}xm#hYJ)jL#yJ3NXcU zQ({Vc8aIX(<>&15N4N+dHM#*UFVti-SL*wiysMiK*C=A$uVQqvUUqGK~ z0fWwNj8QH<93|U3sVV+|L6*&i95u~&IMtGrm>OTCi@V}MW)H?G%TS2jF5GT=y0j;U za&{I8lBRgZO9ojrdV0kxW?lKmcO+Q2Z>=ShFr$>L=-o1%C?e`VfUV!sQeQTO8^#Ft z49(S;{vPQSgNdnxyiz_3626t}Sjq{XbDYMqqCb&524=>p^M=4eh4 zLnlmFYS1rvZ64ho*0A$sqY^0QmwcN^07rk>^aG-@ZhBYwrweD?jP_QzM~9uDSiy2f zML}5FW&l1w8H8Bo^9&+q@-p4l$;t7HMd0GfOC@mJUn(7K3MDCgF@zb4A4#_f?Dr!t z2wgA5zn8QpUd^mQAE|I8efN1tTub!d0m{L(OlJq7ca3zwe7ua^M|tb z;l{JjBg~h=JrA&$LVKNUNY15Uy)~g>N*HjLVQ+DB$~ZnG5r%J^%Tr(Vt5?-bIWSdL zg)`RI%SFH0!qOm@r_v??V3+fKZv2M_CBD~wTr#0XNthRT%5>-n-_^%ZnV)ZZE^c-W zD0W5~|B*D0Nunb!4PDmG;8BIqnbM}6F^I&~vrEOJEbmx%Vdd(iN!uwf2j=E0>*;J7 zn4stL^_U-yEnl{LdZU=5qH2HIXG2YaCIKp+}rnmV(v9jCb8XBfK%0I0dy@6D;4Q=4Y zyTN+_zs?U=5YA>^T&P#9iQy=-tkWi+u+$%iMZ8UBppXl}X@hzE6%ArRzLMTw2K(rAei_VRd6v#28=mk5x_q8m~F0@8@ zyFE~cdajn!l+Y{VwxsGoS28-iPbdRa@_ER) zEd>k>b>otGGDiQJ+cjV#-_4LQ)c5#~IF&vA4}f)x=(*46XX&thD^+q>lYCW-4$A== zUbhDUfse1DL~}XQEmS4SN%(GQ7fOi>Wln$&a~6XRi}p9M=5s}ibNe^PAMOFRus2FA zU?lS-v7nKyp-MT?+i&_SUO%jsk`|Nx7INuH8)3iy!HAExo2}vY>3*5t7rv?TEK2Zm zP2BIS#a-cO#qAFvnlx@&oi~V;KP`Kln_QqmtIJt|+PUF#$b}5k$e%KIl?a{Zw%mDj z)|cRWvB%j}tcRSOhRVtHbh)`Q4XcJJUY4lHTx(v(gN~7Kf7&`G&3By9kb@nG5XdcX zqW0LS>#jFDtVP5}I=y)Rg6q28{jX&bJ9UmOLoQ=bH?}M;1?Sb$wb_4wz~JuUuol|0 zyTjq>6UTUrSR!P_vfi0JT6C+4ylVb_m_OO#BN@^96}mnWDqbG^vr5dur4Zdb=y7El zen(2`JSr(yi8^LQuK*EWVRafh+&nqW;cWGB;P5`3i6Hhn@?Imv(4y@^_V3qeO=2;W zo*%li_@`JVB~{Hya%|aNizT~1`KNuQDM5y@5CS7iv7qY6P5GHis`M|&eb@6o_?$3* zv_fjsss&a?L%)5rt7*ZHudyY65XM!1_#3@rdWwrmt`gmp3 z+XC3?Z1!*x8TTlwB@;eUQjo}h`^InT1hsrf<3`K!V@&PN_{k-2<5fNEw#!>rH?orf zw=NV?KNJ3n&vlR7h&x=S>9*QRXBNHhWvOd?AN@r1pLfiAQK5Shm7Wy)UPGSABN%)3 zw^cUoGB&J=~l27q4JCZ7?I@7Uyai!oFl%t)qga*F_{4l+>_Iv-;+5!&tFkC%5c zJecEx(7Cm;T%^yldyRO!7&zjbgo-Ow?aemue~YU4TPBrJD>BCR7USVBFY`Vt^nD-U4N}}7vr2&L znHSnXX!V+~TII|&OCrWg*DOJwCJI{cZDrgeJO>WH{Lr0vA7J@e%I;*mg%?0X2>FhX zu30|ntFgKkne&FGA7?vyBP>mV^D*AMOD|X0fyVr$Q@&RwZNC&}U>gI~i}b84YW`xm zweLROm1a*)RZK+k(eG-j=Nwx2xw;HaT&omVELU6M-x17h4Brj4jIy-?M{F$;zTY`= z>oqN0dx<=(6bf0Cekqlz32RkO>veS3iCUl7)fA8?>?Qxud`b?FKt*1MrpUmLFJe$v zI9G@|o(e-@<`1zXqPuN055JaGwj{isi{A4c@UONda|A`nuUTG{4c5!AT8D|@o_+cq zamV6m^Z$yTzDboHo%(xWQp?0`W_fGIMD?Gr`Aum^YOkF-Vo3v1y}zC~?Hi+_n7%v7 zvVoSMNdcdJYbQ&+7gj8~3IFB0Oeu-!XNa@`Jq@^t5-_rkG7{AWWIe%1@Luv7RnOY> zb29ZEAvrUf8i)m!R%t$R3E)3e_&zC5-8F!sl5D!kgO|Ty-Beg=PdW4?T`d0|Kt0J< z5@o|waS$$6Z||Ngk{R13C6C%nSg^(TET=wYk6u&0Sc^z{)C(4R%1l>2TID3@1vc|` zo@{;bA3!cp6gK(3YK!Am^v-N93##2}AW2=aBs<Afa#MmhTChEgB^~3*n&Dk%@C07zyVB1YoOLJYN?c2uO z3-=WC0=|xsl)Cp&2Evugq4g*Cu2R7PHMwWInlCe;>?X`=dAsiPUBkYq8xt#KRs$^p z{+-j4dFCx7;q`5y)xE4H;lS|PzNt0avgt2G#wOnV{2kD|Q-;_{=l}Mz?u|A5KWWzS z|NrFA{8cPk4Y*2C2=2=NO;~`mqk<^8ZcMkQjvTw_*CpXO4dp$Y6S4D-@j--~f#3gt?pU zr(IYNSp0AihwCka8<_*I6Pv!vNRCt=#6PSsH2*=TCFK-7=blD@7}T~hp6_eZDzX7G z?mt%yy`wT`jm?!Cmw{T%P4%*r#iaerxS?g((_(lxIBN2OT#g>RU_gH^aX+giKgdK| z92ZM;%-ZUs@n*T`pG|G=Pe3=oLXFR;)(6;i(I(NL96Lf4x(@02t;lC0w+vZ?k)_-0 zwOCsS1`|@o-Cj~{?R}sA7L_%tI5+z{_2`ERoJYhGJ{>3LeiqCVXnd|07n4&cdELFo zO$m}dgF^b} z!t`t5JH2k_AtxMKncWB4yZRq$x3%(f^L}@lf+v7y@$X8M4@AQ%#r6Us+L_COm1uh< zdkTMT&uvfSvrB;P)R*MRcN6|_G>7(>L3_6H^+WK(f?xTvTh=YPG9aB}AzwCvX{5Jk z*l@B)_e{+bz6E=qYxVNzKXRelBU?1Oz-QkD?zM~Bwg+Zc)J-02>lbx0j~G;YEIw|V zMYNQWKRt|2WG%fttG3MPg=HtM@0*z)Ri6Ek$r8-Cp-Y=~b53v?{(Y$s=$YO@{nFEC zh3$JK@5scJnN;a$_1`D`vu)imWG(Ri(Cdn4y)fL$+2rFpckUBDCBk1Up=;y!bJJ$7 z4#u3&)bqsi8<i2Op9nJLNC-U3PuIRmW! zqshTimh!>WC`>^t|BSbo7uo99HaJO7#Qe>}?qFzm-8!c*{k`xO5qCX1*jjtT^ z4+*E1(hQ;|vZlG(g^)f`#;r9B0eY3(XLqLHNJsT%&{gnY*o{kB5bVn#QEJm&6!32F zwloSk7R}`bH(HLVUddX+w8ko*^d}uB3=`fOP9JDwvrL@ImswS$e(uIn&xC>ZHS9IZ z_+wX!r}~axKTp`t(7IN_GT_4@LiSixNQCFzypdzw95|EzQEFBUDv;0xKO$bFGl_7y z9K3^jN8}(--9w}KDPQ`Tvd3eTFMQJPBM%YM>nEaAYayLOYUh$f-6C*aLZ7skb&YPm zDcfvjVj7cG%%p69nbtO&<2wvhc^nM7#LGX1B6BXz61`y2O!2=E4Upm`P9`A=$JWCh`*(3nVJP+ zQ3m6>k=Ss0YGXn=C7=9jiM!cTXmX{t84j=ZZr^Xl( zNHXtqXp9Pr*3HX2ngU>ZUn)3YP3mqB42gVUj{ z>g`TimmjXV9;#ssw%)Kzq-CGDTxd`dGxL-ioz{-Nu^_vb;nBcaQ<4&nF#4y(y18@0 z-47=X0l0#wPWbK5N5g|9sFQ{e;;(q%<`PDiH5m%t53( zyLD9rdAt^Z<}UVx4Qn?|6t>lQTC`O&s^U~KVoZ+j*kTnhcv5=IBHOhUD&bz{kY6Se z`@m4G*jZrVPB={9bLYDUUeutXid813R{P|Uyf4MZ<54)hCh(X+baWtv@+(Gc>s zB!-SFE<-@lg`IUQ}Cd;i~+EvEjJMCwPrz1Yn3+e6 z3P~^2Gv5-@@@pd(c4=5WHRk5jaL;uL>fS}cz}&E5!`xe1931>i)8g!!8}#!nL*$mJ z=iz!-vffl^rnB?L#f7O{W3rX|Q+mI)kDC3KKt2xt=J~mu(V6m@sOed~%0VY*Cy?A* zfgB}J+*4g$T|kH~E}6V6E(7F_lWCLw1w>(SgSBq|q)lP-woM~^*v?1tqsKyd+xg2k zzgwqI##mg&=l}Vn9YL{>_kG!vk_x0h(0F-cWp=KCx-{=}bZ}0UK#7XueqLd;Tat4p zen%7cgr3hgUM`@T`^nkDpk^TXkJ1*IKfL}Gubuk zl?{@~ID_nloVshezEqp1)Si%zEc&)H(@3X{$;(Xf*J+yvmceqSvVjIMayob^NGu`r z?c=;(O?C*zwQc%*e~y6$dA>iJ#+O7nT~X;Vhz`j8L5xOM(btNVpKzJ5|S?@8l zF+zRLv{31Ndd#9-VaGLx7Khdj6k9tVxfTX*Bl%ep($3oXI8I%z^L>0Tevzq zH)DOwqk&vQW{Hy2;9pgrL?JbH*-;o?c+Yf?PNH|kc|tSH{vm->p3=52QOKs8tVTo5 z%$|uBA&81ZrgyGU98Ge==BU3}Et!4@ZU_zog*>wEoz%&BJ5c3wOy#p`rdP`gAukU* zCrc?e?*uOlF=PSb!xS?jM5#G6iScTu482h%UkOOVoY8AD)mYLCa!)_doEoJ&Pr4U&e^YQqdkC zLxs$OnA4|Apl^b+lfboSKEn09jb+{74Fm4~V7E|Wbdf1H@zM3i2Sv^2zs?HWq4-RCSA9Y9L#KzI76dP1k4cxVCByq` z2|6qGXj7eFNOA0~WA|IYT8WUpS5 zaOh}O^Ixd`Ko`AI;O#o$r8>!{mosryvEJT#nF5_VyFv??w+mygRd)k(W0gj$@ck=K znN1e=+fzMmu7Z|W7Leq*FQ>w%iP3>IQLx&w-WLa_S53!oGhfcmVx96Br-C24-d|eN zh~DrxCta03UN%SbD;c{uZqStn59YDG)io(Ew{2oAU9}T@GJ*dMU;lx?km*@7W2R!S z$)}0!8#qEKF~A ztn<$V^uuKJ!|tQbovJBkZ_PA6U&K=KmGp4H&S+>U8+t8=Ro1or=-kWC$F^5p=cL{@ zOSKS}9g$>|4UuqZ&eC$DGurYRJb`Qi7YXJ3Xx|Z-R%6cLPG3@Ar<9WZ{4QP4Mu&Q+ ze|)arAz9u4EJoxF73}D&Ro(W1(QL52P==bHyN9ZXT!pZ}|D)c=dj&b>^>^-P3#?*f zc{bQUz>s(b_RU1Hc6m|-h;n=I0rOog5Q8}rq3&^~f_daI%Y74DZAUZ0NiTwLtcEC$ zukR*Yx%+?S%g^D^I0ts`_lj@#I+(Lm#c{z?&Aw&aSc|bCO)FPknWjTBp53H$tR;Z<()9lvF)6B0Qla*>I~v zStXsf*&5;EqIgS-{?z-ip}i$u|0HDr>+c3yrw6SVNZazPl)Eb060aHipY*p8Zw7I_ z0t}J+ToeQsSs)QsOzHN!I}Aca?H2mimiov)-R{vA%rbh!o1Z;TP&sxf`)ncNF_9apF#s@j}C|DM#{g+J|%w960Yd%D!ygVi@l zN)^VN5;_{SMs-F6G67{fnns`*Wq#8Z>rfO>VaCnZLp}XdKwk8-l$DhbDu9%F<*6xH zwXlw5vK(YUahPOlL@RZ@RjZnZ(j4j=J~~;md`B%oO{DlpW~Y+wb@bLY)iOLwK=Z{) zooZ;7{_M|5h3xR2d+p!3R-q`%kIg|ncRICqRDr8H=ei2J=G>63HH5A{n7>X`R}#7>GjL3*cE)$lJ(9c zUkr>d!p`6Gse^@f>#04c1!#Hb>tWu6A)6561uP1$<(Ix7|qn>S5CBr2nziqQ^ zaO>3R??@6y9o?N)`9ab(g4mw>rVc^FOdlM^e)R3SR;<*~{{=lP%hd(V3DIqwFEGWl zig{mribc;H7WvdkCUYJr{w=2{q5&WH+Syov-wTdvY9$gMY880nbpG@G_eajOibEi_ z^VY(9l9xU1P~%tBrgKZUSjQ_nYo6GW;BF6sRuxON_3}Q>g-43sCY3%Mo)pf6^NW5^ zt7nVGVB9WoBf7w_ok=KA|*CTdC z3AHgo_JJeNc;MvxoE)Q3r~mB0%Yr*?%i~2d8y>MG68oev^M4TJT)RW z?0YwcU(UkjATywK69VBvgz_~gjInwVJVyn@j3(r$FXt{eKIjjE;AHXw;@XZ?*@+zv zE$bJWk~TDecK&~MIdxxC^9Pr6zY6-xB65Al#htaL^ZSQ1Yt!`9*LMD@vsjcp^^mKT2kdJqQoML_+D%eWs^mso z|6ai!@j50_`%N2sWV+^yh(RU)0l6a!83O*tCjzPJ+Es5Tv7@g}{q>!<&)g)ZC_k+j zsLmI{Y^_9-7w2eA!${msFs)Y{(661_){oEe16*~!giXR;dnC<@{3t+Lv#$t(f^+v0 z4mby22XDAX%PkIlQ)u;-2U5K(`FB>L@zbOzB;rj)E*VD60r=mO|H}5wucx!}Q9oiE z<+Wlv)`LtEHrm6eh2cp})wHp(FYM97k2ggK-uzdP_%@PEozrsFVx4au3Nul~wx^JX z-(dOo;vpDaa=p=+&dl1KRE35f$X!kKTbgmqPD=f&)W6GT+4*+3JYGV(AFy*c=~K{u z^Rii9VNi&tuJJ}^#hM~oW6b4!FNLtwrVe$LFIuI+Cko|m{}O+Dkrkf$^toc;=1hd? zF`8WqmF_&re78pt%Okdac$O`Z;-}F;#Ou8Vwu$_n zNZlfy3yl6VXWS8kEEQjhQIl?7D6jqv{9{yKwYy4%bZp)#O>oGOx2VDmcVok|hPk+?|o)!?#Z^X8?YWJ6wxIpwZy~Ln!%~gE}2n@L#POR(?LUT&2n|;o^GDE@S)JQ zlC`{skV5n~od%^lPOT{|6{d+hbFN?ce3CQgzCgCCLGllEV9V+$x|tAwgAI30Q1m?g z#QS&3gjr!qIB7FMeqD4-==S(t6}Iw7h|(fH{Q+YzmfZwW<0j$T(D@dm3-`Xc5MGJT z&L9ixKZ)%jkGNI8_Qc#dMSJF|%&LW+UF*(VHZSCoA6$Gk3t()dc_0jCQ0~dCsG9Za(diNvO zWX^00f6?MW((@wzWrLAwc^9mM#?H=ddh*pe9wursC7c%;%#>U|N{QfB%&AxU)qVZlh7e{UQ`ZZ z$;yWzRj5?e&Xe{}2sz!^WutdVSMl@s-erXYek{HtWuWOC6L)U8N(9chMq$xs9`_}MVMqO1e(v)g+qN0v#Tm;Ft@y0U3 z&x?V68C!PoGJO5%lrfY86N7Y=-_1U7vUh5dG6r=L#6s@wz{CSW$o^xtdlIM&YP??h zHS&+#Mhx{RZ?@3U6V=uWGLL5|0XVMo&pG_>8yyzJ9zOhN`IPjkOxnSNA;Ab!-{*CD z00okLduvT?%7gjkd2fNCK%zE2=m*pX#0P{YwNcB|kg=RpZ<$N|f)nl+icJb-#e5~> zxI}^-#`PEsW|4c{ixlaP5*|~U0oRU8T63z<*^~=kXAi0K4caSe{?lSPOJoYQpIi6q zi3v6(gHb@@J$*Jw!kzP0IGg0hE;_|??vW$8RuSF1G!p)xZzhQy^9P2}3BE51b4`r( zP2MR*Y|iS1*bHMALUM>2HDx>hDI%K28nk%FkcZ@Q`BKw!rfd##n=$Y~(NMadI^XR~ zlkR#iFOU38=5=ZR%0ad6m0`W6q;2B84^?^g$o>dsC3lkrqrTWL&F9kd5jhzrI5+;u z8d-D!ru2^^n5;&tit-ETC#dZyJ0+>{2wrmHh}knGf9dInUrjlKJh|>9Pec*(54^fl zLd{RyR2dHFmL!7Lh}lw1ImAOReia!B(GX)f6lQO>XYr!um}_ z9)9s`&N@MlpLI49h3eb55$HFs0xwcaU_;F@Stq zP}y6GZ0JtO1K}30!=rOWMQI6eUVTk3ux%3hA?}y}!Hddlda_Yp7iAH`@`{Qapu^B% z(N<5jSt0Dn3sy^uQfO({Zj#a3<9lQnk7l1PmA3PxzO5dHk)cUYu1GD}NI9f%f|D)p z&$t!Sl+M-`&Y6OAx^%YPXI<=UHdF^5H`j(biG8`n>aS|c%LLTlPSy=ziv!?SY94*>7lkPciry9~EFl{hU!Rjzvt`K;oNwtUP4rTCw>huZDw3=T6#W6qMno^*LG`djsq?q&1a9kYkK4h(l=rskW(bCiree+jHS;tl*p~jJd++;Mf4>fq;9@Gyd_p&u( z?wl~0oGQ=dZt)%$gTdDLf9(}}0Vwq@2DY=+nk+h`W2$S}K|DLSrW!uXGyaad){F2N zQC5)9J>;Gr*3m|Z@*~cUlICKOW;!FP>rDQX#PmAT;zVgsNl)-ok&fEclk#S)HeH|r zl=30#krw)F;`bDqHm{?;ncubKQ^2Vsiy1C_GFG{ijHI{gUa#rI)DRsL36<;l%Dz#_ z-j(cE%WE<`@IV^cz)AXJr^ll8AhU@V#q#2M+;FKM0>S7e-cX2ceJ%>iB`nGH2drat zP+hEH#-T;ajkyj|(orwOa`|J8r9$-tH#p|Fp26~nIMj?nXeV_E$dO^S4$S4_JZHP^ zd8*Zqa74M#!HA%AQp3%!5y*9Tr6LH_~P3);!K+Zc`Q+Jq;#E1>L9#xUC? zHtH7)1`d)UTXkk?0x?jZOtI(b{f*I~BP6H~UKx$$Zk}YZD z(is-|z`_Q^j+W+>)1}Ws&4b~r4=2W5#w9{tqX{CT!B@ZvUJu0d%J$a;{uH|k(KVRJToXpc4Z5qxeQAhlzczkApn1=(yS}jL>VM~@TsR`NGIb z$at|$Hh=}*s-n97W(Gk*Vc2biEtRO0Ma2gvte715@6z$asaKe>i*}KU?w4JBgf2cJ z=i;W<$$ybL#(t-Dq9qe#Xkompz(BE{<;+KeiN&8?ka*ASVRorv2u}%3U%Ebb3fts} zk&?o_XdSJnvnF&|o>`y_J^kF#j&qg}k{}qyXzRBi{iaH#A%L5XLRS$p&rkRhEz=@pya(08_SjiI^;V!ZRLih4o z>E>tCI>wTSwI&&&(en&7?CRVw3_vNs&Ln{&BGo^OFf;*Ha|!N*v+c4K&L)N&4^V&8 ziYC_eQ6iay=gwzArX}6&3|ROxcU5mM325)8a#AD|mCwtZH4NGp#``6XOqHu~AzZxFl(fx#U@2PTxCmIxi}~Kq%Kbq6 zg_3sB*zj!gTIfv^ckA}q0gt9GM{#rTUg4IpQ;NSmfDpo@k*z#bCbqt{JR7B^#11s< z8Tcp6XC}q2*bDs+u+dIq_fnP>w=5jwxtB--n@w<2U7aTw-Ih!)4u z5`H$lCv@m4>=kw3?a?Jdd`6%qx-@JCqqCi+&%8- z%t(caS29{2_8I@(^{8R|%uDL!oqOclA;R(R#aJrT#i+K6 zC9)9QiGSbw3@h&Hv)t0Q{lh_2mbiS8LvQ-5UE!pG1m+xw-ZI%o`%P-WG(ay+jM zv9@1p1?ySx=`%DSbP1P$yl*(`hMxa@!OmJ=FVJ+3O;imuo|9mJFYacuw&m%R?^*82 z%YPs}iM4xJ>6tj-pPxUD=5CIB>Jv23>p1wFXZcec2q)H)CEI)9kG7_VSh@5wRUXfp z6`n~>h|S+g=68*ke+guwzE^|r!0fb+%8p{RS7l4hKEwLCJ|YHB{ZhyT6lPoQ`yPcS z-iBSPels~%xC=9i^;?uG(r6y9TUx$U%z`wuw8~n>X>SW;Y?pss`BWE0uZ9#iw*K~$ z{tdcxCrlayNvn_kZVr)5a-Uq0d-g7>TmB>`;A&vh*Vf(sCM8o_ltrFpo&OR)TZD7R zm#|bNp76`?B9ZQC$$n)&^}=Ukh5H^>9i$KWn9vmlJYlpkgq1Tin!BHq$#wCEdt+t! z&&_pge{FM;@hE-K41{!Q*imJtp@T8@zE0`&|KUTJHsBx@thAllY>Fj)_7Gqe^bxx; z%CgthSg;fP6Mlwa*KWa4WTW3U-W`lMr#+YwCl7g7pS3tTQ3|_eIxdozG%zKDfl@m!WiP{i74;b zK3>YSfyxUCuXO9G7ooE5HJe_0ET>9P1_Y1&s&Kp+abL;Yz9Z87^D|DiPk%#Uvf!(i zl_p`uYQDU5?G>im#1=2-T!=%voO9Yc^^93LpT#V6dB!97w?lJ1sfMB8lRUAgcR2oe z(C!1fyfXcwK4G}-nb?7B;je$@9{uf9^k*W&(fw(a2`S~POlWIFzI+tBO!Jrj02*l% zb-hC&T#^-HgQc2AO?8vfr^Y%Kh3@obS@WetZ1A|}W_Y+U#?#8W%kFeL@6}Ik9HsLt zVrALVn!CmK;?}5GJct>!pOgj-V~y<@x>N9_3(()s>TpQdMD0&aBcw89JEJ19KO~q; zAb)-!n$xxKO_L*$AQ8Xn^^$CZsDpC)jCMeittuON9Ctt4d;;#bh_VZX+>g-v41muPm(nvkCZPI= zuKOFaz+1~B-_lJRt?TH$#W@5A$6l3TnX7y?-u;T8o^n(Jf)RNGXXtw!c+X2i^PT{7 zXp#ypF61Gx@?IKn5l?SUJAQ}>}c;MD= z@%4W@G&_7&?YO~TqLu;b8dOYO%x1}hJF(4WO9PjfVE7W0Pb+v zzRM@~Vz?p#dKIX-0MI3)r|htowmMETE9_A zrLWEPrM;*t|824__~%oojRg*goTb^^1a<77$MgIi*2IQwy<`XjWfIX?VGy&&Cl{H% zHVlGVY%ADbD%YDK&#$C80FSB-z@;a%4N&zi{Q_r5kNSUCUWIQ?RSOZyG+F(sxpFw~ zPC>60)(PjMg=|I3qDTZRzXDe)V$AM`r}glZ3>DIK88CbL;82`=;b#W#DU1Ui7Hg@x z6GBb!6fa~cB)1by3O!_v|D=VKIY+lHQ1r!>3c}N7C*Edi^)vQbSBct)@?Rk}*vqU>4cBO_-m*$cl;d8OD}7Z$gip4c~B( zdRvY^!|f33J2UuIYGFQpq`#3R4Yhm``DQ{bntC-G7Kt94^o01AT9k+N-NzpP?@DxU zjr5W9Uls+w-D2z)1>>h7O>T6|Y+_^xB%Izl-cR`&@Jp)*F>*MUX%}6t=LM`WLw^5O zlSnV=WzkH$r1m>vV81e@!mn4FZVLTn<5;2FFjpdRUdm9Bv70@rCYqDWrqEMx<@qNfU(76P-C*a#_01T zFTxj3tmJ(=oXk?0VBb+nNUcp^AA7sOEI zTDoSZSw4C>a*|YF#-HL4*(4X-^};lDM$bbwrciK{iKbU`nu~FUt6=KnQy|?Bn>=zw zWTOHu84J8Lr*pj&TjC{piz=ctqMT!9Wrxjutf6~jBebAsLRZCdi*zyQdKAgl@z1e^ zwSZ|3YA%&4`9dR>yBs~PE9S@w26NCn->KM@__idA-!6sGG>|TJYamD_MGQT^ zD*5v&#>Weel6Olj^f3cc5`7Gu6EzMh-A(ccAM5->&X;Qooq~BH%&&>EEO0_PUwN-5 z-sc7RuL%U~tFs}ii9>qD8yfEw3r|XIolZS2m(D?gTSxNDf%&1v@jh+sJpG4bM{EB9 z@LnjIr~fZIEHv6P`L@VGXc%H;r9V_cvsYgqWg3Eq?jcb-qWb(Rd(KDYP~!f3)1Nf4 zjaWy$sGwqo9tvVVCN-?jGWou8_hPV_H8MYINci#FuqnfFCvdb;6vw%OwyqlO@jL}n zT*3~Ut!~!|f!$VmWQ$;-Xp5S5BY5@|u+T}hl>{CX!zx!o8?+m9;bzi0_D^K9z4Jc+ z0R0ttiUyg;Og-1Al^5t(g1dCGYvydl!N4FXF|B}D2=c>q^aH#NgvQPK@^!(qr|mgc zfLo;J?CXH-<&Rn&U@vMjt@{olD~H>lx}k@8l;GLn#$3NCX_RY6Q^Y8jMxT@YYmVe3 zO~t1RZ_VOK32?z`0#E7Z5M#I=9&uf0@%?z> ze*lcMqrkV$DRx~jf}^B%b<%Wi1~^~^#auD&`(e1K>tAQ5M>i#d!eR3HxnBn9)@%hU zpNfX@VT%zq22`$jY45>8gozP;f=f#Nfcbuorr2m&5ot1HkBet$Z{6CAwH> zjqV9eA?>5z+fjAh@l(0SNFUe2eOv;D06E~<=D@q5S(oAn? zqlL4xoBFs!+ohmZ19IW3cXfCv1eNVme|Gd!Gqa8NT;VrV0hPJs7isc3ySGhpdT08I z_9vrtSDr?NqGIl6inq8K9091gtTGA}=5E5b@e+6+)eKUQm}-YRy*wx%N@Hsc&hVxl zne6|4%GZmJ7gV)CM9w3k#P!2LhxV14nkRm{2gX}5HecTX#D=z0*pdDRcuE~<;5E%- zp&dQ41C&6hU%;eLTMm?lT-W0(`v1Ur)LOK%ZjktjEJdBgPh1uH{CZD@^tU0J-XMurg%&IhS2~$1B7I{#JO2eR8D!PL$R%7&oRL7@3XK zK!h8vMS&Vn9|v)k=mmyA0OS+w3AQ&u%)geYjz>XXOH;=X)~>NBh}8$6|1!rNFW5c*&Ivs5gslwp+~NPw%L@2Sc%! z2l_!hZ(~(B*sAC&vApT%#N9PIro7$ObpEizkHLYbHk~hx6mF$R!R?OcV{Y8+X05MA&Ny z{`^423mbBdTb?H`}VD57|VbiH+jUK1E2w`miSnWliKdbLgMLU-AjXT~drL)A-)6N(Mp?47vuaA46WtO7P7gZw=Fz}2}v7E>*? z5=$;;yGb+k9f^naq*abCUn^?WW%r$HgS4#akT4FtCZ~g>-r?;j$$GV+^cXczzTO0m z-owP6L5{3yj1!=)g$o4eq? zmxj#%pSN2t&DA0=2a7$u55P&M^3AkC?((;cj6sxTjkE+1`WFP)?S7AhXfKrS*;04E zDN^nx%{=Z`V@)cwGzI)FKS+Mwp}{-^v-`Rzr2OV6$d)a5voDpGytj~DV`Z%MV?;9$ zS}P$l)p)F}*V$f6aNM{-8fbCa1tW`JGClj_papJ7hrGI{a~m#HAa8IBBQq_~c9DLD z()iz>Bnq*CpjsX~4m`=|pHZ<%>C(>^Y zj?7hGQ9ig6^XBc((O=~nM%F$AD`EOl6;6eyI?zZIQ{ahI z7$Url_i-5FKIs{{VVwXLynhdzHHIE9Pp=0z5f7NaeJA0XVYeLyJInJ^r-d?wS6hFX})UOQV(ST^Lgjn#zc~)!XDsWT~(?6pJMH7{Je# zov%cT)h4$X(L>Krn@#sNqHz=9MUkSYFi)xlPrK)_S`F=cQ`^X{LFqtyKN%Dezw%P3 zTgMA`bUS4%y(YFdWSOwRT)^!7$rfdv7V~+=PLu#S=o8r`<4krMrn8<+>Oen5B5?iG z1XWCHT=FKi|H>F4+cAesTo(f4qtQ_eIK zl?`j9VpP5M8Ga^J@Z)wgD2qfSTgzDL$mJa%lF2BlJ1%XiHDOR)D0E1)OsLU?;a$TC z7QOCI+x$EoIAl7(F!k4;D!tf-S9W20`Ye&Lbh_7 zq&NMZNe&N3Q*o3AB$(o(-MUemA0DOwdTZ8u?-gkXg5bMtIH=tY)ULBX$;7w?Ci3M` z^g#Oe!f69KM&%r&TG(y~_N9=d{2wy(zlTR%`ToN?-@-aA4ZX#+Wmdaq1gb%AVGSGT z6zCrN1Tt8i5S>y1Rqt@ZaC5b}19^gNNS}a+Mpve!CH|;hugk))+)Toc2P_(i1dW#^ zF)VFbITHHLCLFd>>R0eTgmGk-U@gwMcj|V^9)sLF|f{G}w2>URiiYNpB)BCef z)h#r|Epp&H0%JkRWBwK9I>(5wHCIS(f~0Yrgi(NV-1?e^Lf5kqb9KH${{Ss8IX-}Y z2&^Wznk`OMxaBs2ta=<*^o}0`7<@?nTE*rvttR5Ehu)R6kIYV4jxpDo#B=1##w5YU z;>vJ&>spr=61M1F$7N1LB|6kt|-HrO_KeNGZd2yjhu|~4M0=6;eXx#0QJ?6GUH){ zACtZYY7rcx=GaKjekysM=De5Ko#Jw;sGxu|yS*0ZCz)B>3%6+=wV;c>7h(Z1u6BZR zO}ATlG310&Ily8`$E9Ycb68UMtY=;O=t&jgzBc{NzrGNwgxi8RKT7$0 z7sP!oX&G{Ab8hPbVY~BJ^nGq^E>AK7S|&dhm6 zwCJfvQ|0MZlwZ1g){)|S4NDP*DK0Wp@N24!t9$0YUyA+hU1x_XIM%L<;^&|Gu3x`p`o8RBZ`Hi9%lS=R#CUEcCzwEBhr+BY7@4IE>@5+QGt_H3adGd ziOZby;-~WGj%Sn1+>MQ~fYPUck%=@;(zgU{aU^{wJ^x2B~jJcwKOWu2UK$-?{8 zGO0-Sbw5yll?xrxpDA%FHzo0c2hyThk`yuQS0taG7$kP$x@o%{RW)ZjsOUDDurm+c zP6uJZuQj`|n#x9wP)5>5dsn($LXj#m$fu4#Jm#=(EUm8ZvQ7?23w*sKKafRPG?BuS#Vo>rIL5cK#d~ zwsW^RZgX832#y@!aCrLHJD@R+&72N1$?aZ~;NJpke-JKq*ngwxgK(Eoak?WDO351GGR#gks-uO6G?~an1?HuD7LTLbwdSF5pC$;t^JA$b z@Txnp;8)b2555T5X>r?YJ~GCksO$_aKjqd2KEwTE0fCmt^cCtJ3-AYryc1$Aw3q>% z)kODBH%~EM?A>wb1_10Ux&T+@xz`g`#jj^e-H+nW+Bj!{@cDhQIFEXxTRoSd5LEyG zxC5{l6(WpM_32COE9G^2pLUW%sWL0h{v&)&@F&7w55I~&AZqYw8gOEY@{(Rz0+#aP zjX{!G2vR~Kvm72VUy9!sf8e4&6+A`bn;lExu#dyq9*K7&+uM@Tok~1~{{T%5wOTe& zkcz5Pci!9s52VOAPcqIqrG$sNzSG+My?-vJhnsN@6E8_ue)H(J^*?*4uZ6#6zuSAj z9~rc}9TUb^c77W1w33Tg)18_-Tgh7!BXU? zIRlFQEd8cGX1y2ojquDmMgFaOqv{p~p2F%09qpBvjlz3f>lDD_sl%`Euo(yW@A$Lu zQ{oT7KM^;FynCg|V$4ajxyrr0++!GcU0H(;Mn-a{h5(BF8^esJCdE{8#lCvWXg#gp z``@Q-r^NB5WtrjEE*)~ye`#<300a8e{eb2lSy_5%jO*DmG}`IyM+|AmES2zWn{0{{U}a8hj;&KMDLm zj&BtBZ_JHinpxc6Y4Pxf%XShOVpJIs5!(vN39na+JTuke@{PK~{{WGHqxn9EpToR! zWU%><%3tUHXYbbs@T0Fv;(QVD55eCNbXfdD;GJ66S<|E#^X+9>0=WCgz!Fv0xIHVh zQ=V(_nv~^EPBT_(x%z!6!lXIi?w?dU8OODJANxvv!u|sI#cel;JQW?cg}hd~r1Rj1 z7T3&n4Yv`g0dN%UWB~4BTX#eEkgqNHsqvfP-@%^`zli*QtHY;3f*^ELle+%C z^ZD~IqlblgM+q)y-$!C9QP#ej{h$8;Wj~1j0JH|HZ8n>0Hk09hF~!cmaS{745#?DS zQpjhMcguu3RD->SFn>_IYySWQ*!WGWcoM@w@MZnpkFRNBLYlq6S=#xeAfL9Ph{J#e z6U*7NIRNqxhnsQMRhQR}9=w~A-rDl|{{X3>7e%MpG~<(ltC*gYkRn( zZN7E9ply~jka93rIA9wfd7_4m!^TRokW{Wg1RR10=t1dNXM8t+;tC$sz1O=nzt6hz zG|Kb5R!>&ZS}o%L0IkpK{DA9UF8xFHZs+@B!fS{ZRFz%t^yJUV%*mIyg*gREw4h)s zVM3qUZv^=B!T$gev@3rT_$OAi)->CW65`T9B8LDHj5uU<$lQzrDd2%$g=RS}X~Wp1 zS6JEmvs*te^GyA70h(p`<{80SORQe&`tFbBljE1{@9`7%e%BjL@twJSGHZz;)3t;F zZLfr2Hc)S7M%qCr*dv@1f@|ErW-r)}#lIZIuZ(weGsU66I9OtRq@{R&FX;QD&%POaF7PkH zPYpxiKLP1b>6Q^Hh6pn(&Z8UVkmaOua!F&&59v=0?V@+{gV#Nnlp zP!{uToQ3n_DhXf!c*r4!HnV+cg>}OWgX`Y1%QHO7hjEhST(a6xdq3-=MPl%oetm>i zmE+N_kNW7(3HK(6GK=yAbd5`X@vx__it!nx3=F50K}vs$UDE3Oj4~>CDwxPe zA1hP*-)H1~vEt9zN8$Ff@IJ%gK8>nh+g|9eadoHabIl}+_PdSbnXSVJjCo)-qNxEv z2PBjDpYfC6XT(o{ek6~>e-LcqiLVTjSY2DlC4%-C$OcIN0ElC73>X3l?m4gO55s;V z@b`;!du@Be8jY^2rrs|2F0AEQp^ySW+UQ6)AcN>Z2C}?w@bkfc5%d}SHREj_*I3i1 zQVdryhT2FYayQlGDN(dnS_0WJl4B@heyKP!D$zpMe^X<_eg5R}Y>__8&hZnlf z#9swX{k!2HX1}}hWX1NLfjr4tV&@XZtQhZ8xG7LnoV9*CbbiARb4 z9_o^5moMd|(kKlL+%dRu6v)7aQb$|{0Gtt%U%Y>_KkcF5kB-pUcw16O(R^yq;!Qq9 zEe+el8F{6)kwF_kMs1IRtAUn0*Yp1X;;#sJr@-3W{u}Y8mvf`)wxoHsmq=1Y3Z}MVM+6W-!K`R*ZfxByY~^?+jDnIz#a&3~K^*43`#s@2w>+S#!P;$mvtNbRRsAM> zj!(oGyrQd(uNAys^|}3bBW@J>{AvA`zu=&^)|VFk3HbYVl35iI+IWbeSA?hvk=>+G zw)JoM?r_V=kgz#l*c}&J(tJUtTI*UJ#EbsY{)6*8Zdm<*h8QIu?gSPyKGnsXArjcQ;jvqv6ZmOW|8 z$lM;ZM;YA3&OzzzNbr^!iyMK-`U(wOGqobls~+AI=hm_a^9PorX#+K@dn3-jAS;eI zKi0EEY=utju+CWLrEg7XbCRbwQ#%cBDu(%k4s%pyVkLdO3F5S_qIrO52$TcK2c=ZF zj6|r+r!V<&U6DyGPHEqvog=Zr`W)t*9OIrbO_&nFhfb6r^CR8$7^_a^Egk;=!#0op z6HPVb{{WuK0rw`qO$Xf{L0^~O4p=vh^r*WD)c!`lP)FS#L0_HmxBJB}Mt+&VpTMi? zg{rrzt!pB%$SP{pR=fWIk7)<{CcZv9qwXp@L9@z|{p89+mDor*85tGuf5!sNQ zr58M7+P>>WDwzjp$?eyg_&ei!gX7zb9Q=a5mnyTU?b!U=F_-Szea|9(w90dJrq|G; z$3aCDPytE-G-ABh#orS&zYYnktpK^c0H)vIMm@>(6%(3@xurT&o3v5V_?N{RXND|g zxV)5k>MC+Ad1sY32_rf2y*iiyCk1#{KVCDi~VZt z?(gI-%yY>mzVQ8m5;;6e;#k~dG#2BIy;{7yMj^{>k4~Nu!F$`Er^y&dRh7(**?1qV zO*FAV3z=|Ciat;|R{GQ;AjBj}(W>Np-F~$U_D|)K(&c;c)A6n(lX{!ItaFVX?DyuP z@|raRac)WHwra9TbqhRmg>WAcx}Uy>x%4@!LVU{66k)gck3;mYUN1xDZqUb#rH=%3 z^{+Shqa=gCTF4|hTXKvSouHA{y&8DaDBZzr}3jWoXb1TZbt-`9Q$UXl6DqYDc6k-5C2>fapWQyHDOiCQ!`x@wp zymmG`8dXMqE4Pyk|aUzz?5n|F=0LAU;2t^KQuM z#<{BdwWV(&>o46x{$-yI!yD_itso3Uz^+Di{SRvTYQA&O;tQ-=mE-q|Uq)BXXKwR_ z`yXGy-vw9ZzaXNDE5ts~D58o0|J3*BEWDQlq-;OB2~ql*lI5i(!5&;ufwg-K^}(WN zbMrV1zKNgfQJYqgl)JjbdaLnY)T`drpUG7t_EDPx8D+_Dkb_nJdv_%pYi`YHPN@?F z{ts%#mv(veJl9+*X=92|u!bdOCQOjWClzW~t(wovgcZXEJC1)ULimg3>OtG}6-p+Q z@&HyH@sLNRDxRhjX>L}LSPXh_D#9%A!z>_@8w3pZsljDC*ajn&wKrFkTgCEiNyDmWkHQ((5bw2(EFh0X}vF^{J;Bp6AIP1KG^ z@0yyyRv6mZ!wt?`Ipg!Fe(7or>NX@}zB`DCex#wn{OhB#M0l1nBkq2tPH;Vl;gF^NErlIuKYTUP0h*c zwFc8(UvC7Yw62?mFmsNT>Hh!@C57+qA&ng{gFCt&dsolq@BN-f(`5;LVkfRe8#0~j zNzWOt4e{Rg*7u0<7+eNDN%~j3yLntH7SE?@@SliMV_n#Lp#5v+@DB6P_H&t9o~IS( z1yC?DPBT$W^8AKC`Sk5cuy-{Xfn#4dbm?D0qU~mUg0+f=aBLoxVH^%KQ8r2G)|D0# zu5pK62iBJ=DOx(6c7x&>w2PE!T1SMCHU=Noy}L}-Y_&DrZ29g~k&$0B$s)3eY?(_7jP$Wn^{a5PzL;Vz6G(^CQy2W-^A7)gM#mAW>d_;m;aDeF{OSowpq~Z~?CE zrbGaf!LJ6k9(3NQ_bFF}Plf6uw>xoAPlZs#g(L2h&=d5lK&uSmp>QPa#bTMGNh2{O zo)i1_-=9uKPo+mCqB?n*DgnnF9C}uqjT#{1*i~t_$s0!i>H+*~YJJS6uB>7;vq%WH z2jx8E)mKG$oM3Mm0GifhlOa_=KgW}UjCRL-)_Rpdog1 zpO}-|+M{S7ak=E%=xXGxEC>A}J&H&1cgo>MJ5CA|;4fOMz9$ zeZ@~fR%0Rp?=O7fqq~fql0;4l@+t;2)98|*UX{Yz8mCgf>MS56#Ol6>_4pScnn9`~`MgvJBoFr(anUA0YlU)~t z{3qg{AL|XF_)-|;k}hs8X2`dNtOdv00Np{{U@&hra=&Zw}ZY@h6I& z0Ww%yKHGRh5)|BA0Lq6e_W)TMuT%RHy8kNPtvO(+H1G)7moZ%@YlswJ~Z%@(Lt(d z>{Wun=6Ef?cs^53i~)ty{So4)UNWAsz@ zRsDeVUlYls{7m?@D74VsW4yA_;&~G4C=w*w)zhVuh12m?9&vPIa6xy}F? zKGpDl?92Oacq`*NTlizevP0qj02vtfJ+zFmSzMnnV?k{!kCu``;}IX5t}+FE;toJH z{J+K+{MQqXf7-F-lG$5Cyu14KKYHPOE?-9ozq8%b-SuzJ{DrOy;vd6b1^iU- zT#{4`;wDz)1W>p*zyJ#UqlG+5Q&Ga_8|909l}dvYi)NRwzQ2= zNMe)T@17lv{^YE2O{4 zo{9TVe%!tj_$tzPL&7kB!b9Sln>iXcf)m3AR;0aK0 zr{@4EbNeHwYMOSdX%+UFaTV3H>%J+WaU^cJ##NM%MnM()h>$ap*1nne3Hw+4R`?S= zj+3NnO?TmFZv6Y58g)mBqjEgeg_(CkSPcB6F6^9Sl7AuM-wmUP=Edc%)7{6T^z8h$ z^gl)6&NX;wsu=6aJsP&2zDM*k@lWA5f&MV~W={+F>rIbQwIs<2a_aC*0Sh8DY>o=& zI1B@HJXhe??HT(R{6P2-yTlf&}#2#FW0&J!TbsRoxUM_fAJyF zJShj*^y_%U_qx5(vf9BBV37a+Bo&U z6|6g2=&)@+Pm)#2v$CJxI;#fq)PQ&l%U>FP(to$#jQ%8CYCaD5T_b2-54nNuq_}95 zYB$i#in1hyi-=b{fb#IgTa%m-`QL;9D8K`tudv}C4BVV?d8+>aw?FwV-iznh`EDZP znAzd7=dbgBm-(G9i+pF|&xk%Cy76C)^-1-8Uh@K5n}*3MoGPk}Dk6@EfG8x8IW>W6 zWp#IR9i@~K+|Ma0kwT_P9D$M*R1!`Ga0PnL!OwyJ01&=8c!BgE0a%HAg)MP=bG|z$ z2;?M_DmYhes=Y|&zj(d?e#l=6{s-IGYgYCPsrb4ZUnb{SxMp?|NEgogJZwrttXBZ2 z-M1q+CqHk*JYRsw=`1}pd99-!p1qgie7;M=ILz9ksV8Xllm4}5$Dgs^>@TTlT9%XX zi{j*x+f5Qff1_w{O7P7DtT#n9)UoYg%A`1nSYgg2#@0stc0d(C0|0P6F-Sn*_xjhs zpS1Vw%i%wS3#oV`z~ObD8bXMNAT^DXN)Ak}8|RS+K3MKXXRxuo_> zXs^+EpRUt}2P469x62jxxBh3=UlhJAcoX3FhORtS<>Tqf?(w@C*^ck*Zd{VuxeF^Xb z_LumT`#$R;e+^x=?uBu5E;S7f;m_HwzFmZ?VT=;(#vKPcS9-Fj75m}v6ZVz(d-3-` zj{g9|cLvu}i9o*6?pP(oyrg7+GyB!R$aKiZTpWS?;P?aK?~6Vrc$&|`ehky))HMs) zQZ*5y7~plnvAd9`%`{ z=sGutY$DP;KVfU7*ukGF*3J~Sfj1IP*y9SLB$9AC*1GU3=YNlXwBN&T+4E1flg0DP zaj9w^V6)R~!pU;`5VrRPhnE&WUF3B*0N3L994;djRyb@lT%|3N>VBh$uSX9DTDwUu zuc7of^Vh2X0Kr5*XsNtsCyBJ{>pNX2O}ZTq?9sibfMfS>Azw9%W0&2KNya#@ z*5BFN_OtkH`$6fZ4JLo=zY*G`X7fh7brXhUEX{6mJotfD1(aoR^C$!p^cl|u=6P); zTD)%_yt>=h`my5WoK=9v)O4x4HQ!76A5!4ajB`q-H0t?FpGbJ$#y^2y4}LiKaA-a^ z)7E#lSe}CiZXQLn z)~#Y9AQR>baDcJ5kg4dzsoL9FuuXqU6`QH)I&QOLai!|k(Oy~I#?ahb&Vop#jm8;W zSP(%SL9egjjxxq%)h!Qto8B+dK@KWIDI zZqWY#!ad=uLZahSyM(hyWCWtnhz-eiHmB@SnlI2HN;9!O%$!lm(?= z^UWxEr+wij^RSlhsh z_KSmUA(AOBowpXr+qxipv$O&ZPJYq-m49#j6XFNiyb1A|crZuFqPw+k$jb@FId&ry@UWMXli9QM>Y70&gkb6! zNFa=Y4nYQ;Ulx5?=l(tTYvE6b`f}KKn%B&DL;afOGqQVTZ~zOiuoNbH_Id~8^b&-Qdp&3uN#i4!1N&3 z?3pfqhsObKn7q2HjcEJ5A4Xn4RdVK$1a71-%APr@=_@q8Q+EXZ6=@I% z{I&iOUh2P7#L=TZ;Ib8jAVgD@E6#qkTIyjE22vHhd)2#;`BnprRar@wYK6!Gt{a%m z88Ddww!`<1TO+zl^E_fQKvraEA;QC z2cQ-CoACP5Rq>XRE(X#r5%KgnuhS56Pp4}9^Nl3_u}jgPqi`x}m0s6E)_$f{Z02bO zR3Xi1ReP|8+EF0Nl{ppiPq(t=U$c{_Ec(w?-H@1w|S!BN~z4W+x? zT)L!DjI%Mo6`QMT+E$4fZ9-tI6tGPE&0jD0ui_r5;slb*%-LupBr1W(9Xab<^()3Y z9W*e~sj54#AN)<83E{Eu^raVRGD@s6+4S_qcooIemlC}4F=unRn4FSNy+bU5L%1HC zQwXm*rDoozuS$$*uQE_c=D$FHV0PW&UlaL%d|NO0?VS^CqxBXyR|)5?;1435c%a+Nv2eQ&dbY8n4a1?TH<7Whx{r8JRE1&J z@veAlGYLZaqlfZ@ep12}QAyl6B!W1{zG^~|xyuZm)RC?b0&{`ZpNtV++;%=zHoFIp za30mdd`*N&;2mBk83qsPE6{6#_?%gM39GXI01O15pdz(^;(mDjj7oOw%78wVN?43e zNAM{G4z;(uNCa|&gU1IL&1Jd8%iEzWsouIGZ5@q21q}G(Lu?=T{NLPBI<4w^Iu5U&Sy{es@;#T;Qs*ORP_odqP%15f{G}h5C7Hr zFv)HL+O*8_s3Y!UpyMX1PQ|VKtA~)5IZ`@|Qcvbe-!KulKQSHss@g_nK^@X5QTKMA z>(;-hoRUA2n!9Mhf3x&?Bn3ab2eSNB-l1U%QmGE#t z4bwGXd6vH?+Dvf+g#c%h>0Lb97<)H;OzCk9j~P?bwOxWMhya=6A$FV-z%=HL1uh>qM(w_3zjJIY}#;0Jq=m1w-BVt8H%S&p0$-p7fvqvqE9_> z8je{{Vb?Y4zYftCP?(naigf&I$

1*>=a5&H={)y-VSw4<4g%(qQgPf)0Dvi<^JD zNcXavePT8}s=No{7*^Mt{gf5-2HrDX1Mw6H^}>6)3i;f#bbA@XUd;3IRoexzibrN# zBy#Ul)C2tLv#Bq^BAp`$wv{95E9lRUO|@d_+O7{}s*(V*GvMuGoa2gx-HtHS(iz7j zk@cWum)R<)g+XOMdp^F@1vc`M!zxD=3}j*VE^=x#Zf zo$P=p>CS35&eNU_Gmoua3=bWtlbFzM+!T@Cxnk_>&bFPdWR^fT79jd{J0vPJXHseqS8t(`ZpbC) zU_D9bIl%pE)Vw3`iuc8+x0>?3o|Q5NT#d8Nt^L8(6p!L=PhpO0)_e`{drr|=bw3d7 zzq~7f2^;O=KAIlUw6~O;x2Pz9v$XpERA>`aL+Ltb^W^xB!#d zqmoGCLnMq9R2E`-06Lod(fy_W0N|khANYGH@V~*aW8!^X<9|O=^SsL&Cjz|Gvv|qzNfZ8y=@tyu} z?jvWi(yhmtd3Choexrru*SlBX29Dp*qk-<_9L9SQFUl05x@gG9D@b`=K z%Uwp&<_Vx5Xo5LjA3X{)!qO{wasYCy20;M!uf=$4k8=#NxBEn#>AT%Gt^23k{XaS3 z3??$4HW?^R+pU^ElSX~AEU}zAFl9L9N}Pf{2t5x`U#MTR5BwAtUGSXtz7F`~b8)Na zg&7lED)%~!61E_#R&RV&;rC|UW+5_P z*sA2^$_=u{7a5RkV%hls8Lt}soquG{h@T&|dv{1D(L5~5&o;X}4$!bd0Z0eRB$VfY zkT~Sm>zS5e!};D+e0AJM;sd{yD! z8)z0@DDb8HI-ZrN#IsvnTej%rM)dR`@XC2o2p|webT#CE1%3{AQ{ZQUE&L7PA35z} zcA9DK3Yn*!@x9!lsOPsC0|O@&>UURG_E1T6cPc{+N?u7MV5o|tB$hk?0Ldo5n=uNP zwv=$~xZT?NHo9nj;YM?Gl&d72wbw=SED{G=XPFs&x#qtre`#O%D3*tzoBsd=d=Lq% zY6joSlf)Z=vA2>?bM{~}=gAnsEy4nD2_b;_Q}%QHyS^a&EYwDq@iOxJ#r_()n|#t- z%4gKA2?Q2`NgFPN;IwXgWeDhg#|h!~abE*0Y+#m`QF=-Geu>)W&0;vqE~ATBSxWsb zwoLu_HZ}(H`#{^oZi4LdwlA5t({?kDY}J-zQIo}RpANnycrW6whm*w~1=Vf!Z8@Z9 znrnC55>&?M+;vx6jDuaY_VL_A(aNEbfdmndL9dFbPH>Z_SvbjWbVH}=Bk(Fa*r?A0hoxxu={eZklp^f)$hyK5k8ZT--r(X>lc`>`!kzF44&pidE6j;gx~p5O^ZL zqj(1clT*T~xpeueKZ(DK&0C`U`?K)wFRz-{#VUB{^1)w~Exm8tv0i}J0Ia}yFqMj5)&@e>uC(!zhjbbodB zn)sLbF7NW|w+B(mr-|goJJ0N=@c#hA`kn#%PJh8W65<=b8vF%TH}hRQ)A)7#h;4*Y zoPlI<62;{ck15zIDac?~5BU?TXu8LTbt&|35Ln!4xAv}5dt11g8RB8t<8T=p*nn&L z75$}uZC{6f1a!--A48ho#C|ajU|lW=a+a(BEcZ^lBCbFPGVFK*0f_t+_?z)7#NQXZ zZK-(w0L9M}YI>U?b%i!X8b+ZbjTe#?5kTaUdXtk`cyo-->fx`AkM4fVo|5@@UVoXV z7-90d72I&Lw7R9L{{VsgY-0FR#hxPYKEG$;e+z0C8fK+sAd)MKX&NUO>ZOo^Re>r- zc?636$o-gqZ$AlqP=`w8LpcEnEEIkX z+uB{;+(l)l!uJ<5y2lhzsb_a$096Adlh{}4{{Za4{{RH`pHzoV@&5qFHlDyoaMHu! zrA?AZu{!x}cOz$NKGw`ixhTqat7HzZ5HNWx@P5}Ye6>EyUlV<;)px$#(a*uv^ElSF z4_I|i;r{@D{SVoQ_~H*cc&np>+I-8;^GI{wx0&`BzV zJL5krv62*&I6Qo;q~^YWg%%W8qZ?5aDyZxS0DcwD_`}Bj7Vr*-H;FuBs#@tfWxhPi zi>U-?3E%;qr1k@v{&m4t#8SjUrWWd=dRu#+wPLAJof%WaS;4JuvHaA0ZT*@)Ec^kz zf-f9cMa&jpsJV<4K=>5v0}IF>fU50*qydE2`L*U}%eKkOOfZ;BTd-YWR14d;eD zD$)7Y@u?8%SG@d~jzlIHo0GY~W?U5;STURSrTx0KPm4&t67UY29+jck-bZwo_Zug@ zo_lA^az;LA;!nN7-d$Uqau3$Ovse5SC&J!2SUf55iLHD)ue(0YNq=*uT<&nOMgzM% zQ(>?{$~N4F1-6gfGfdNpR!F9(r9ZURvP+}o>F2$>E}xNLFc}2%R|iqJeU#OceO3Pe zEsxMI1^6GrzYjb=YvFGPX_nenn`nU&3s{mRVh1k400r3cPf~fUFB15lK=5vZed2Eu z-bJTrdUP*6)zsWa9BI^?WRi1*QIf=f0VfsWzaIYpX`hEb05o5=c$RT{t4|n`%SX17 zF?Vu>f|pS2phl$Sn>{)Lc(1{4j-Rxb#Gj9zAiKZu4x=@mi0&5CLbj3C#t4w*H>Pnk zx#gsAa(?Jg03Vz1zXoG-$-48rY4wxq>EH4`??2*PCRI&RhO^m4boKtOeRum={@-_= zE{^Bn_rVK^EVOr2TdxoymfrHSgH zPpLGLMmp4L3X(l5`XeRC@L5hBVQ{gxch$c$@vPG}#AX$Jt|C|Lx+B!S82lpmh4GVG ze-HdS(Wcd|p+%N58B3nUIVDm!i-VKK3FRT&ts-S3Bgw1>g}0EoT|x$(x4;!6lEG`lFTb%^e6BS|!+ zk})A$ub6}>lB_{1xDW}i%zQ`3XOwEHbk{8ojyJo0cGYXsPnq_-6N1WXr#hbXRJBss zdUR*wSM4?X4|w0;wD&$L@M7F}m%(EV=Bk->3jh}`Zq*b9k0T;vP@|)T;J3n~1Y~qI z`o;TOe&11Q_PU?LFM=(;*5)>~zwr=Y=`3;qxoozv7{*nkVd@o6-XD|EoQ`_e+VE~C zIIDn9Heca?X|FZ?KIhGGMjruB6yujl@#?xK*W<^O5EZa~hbDxxucs2b3d>Z|rei?jn(Lc2O5voTm+z`ht_MIqb?(EEi zqoV~Zp@C5%6S>_4WjrK?oS7N7#5x()!~v@ zbqzvz*{p3IHz;F>?q5DoRFXh30gN*eK>SzMb^U+D+JxHAiM1=uUr@L?mhSpzxSh}Y zQaLPt6JK_I)L*d2ihdPe9};{mhW`M>z8ID}i;3Y#Ot*S!!b1@$L5U;SavT8PD`j{D z{(U5YhaC-le*<80S~xXREPT>?$^2h6)B4=-xT71GRK&_SF#`@oVk>vqetyG z?7ja01qji#^-l(Tc)PXHui;C0Y<0b?miLG5xx8{p!znm#GYb(Lixj{I^*YM>%Tlz4 z`%a!an`?;HSZ0O9k;xl?%F4<}Wne)B91&m0Lnv-*?BCh9_RjJD0Kkj1(lv&k#J&@d zDn6faJ2du+ncMazk@iM8JEI^gb;{$P#<;J;ILXTult#_!;nV)z{{SQOz8>SrMXWw$ zs{U@jGx{~8Cjz{?;djO#0eoZd+IXYD8mu~%x5~4|r)>7pvWD`fjQr8K2caKI^yMYf zk;$*cD^jUOqorCcNp706KU1Yz6%v&J)|#-A1ZQFY?q9O_f*H@6Z<(X6h`4Xkd(gf{5J zh@w)*YA*m+>fh~S`vpa>-0MFTz7tI_)h=J{SAHIj1!TFAimSD$K)W3M;*1Dmxr+nE zeqi`B;8(@39eB>;z84V^J=OszxK=&M1Le;Z`|km)gAaxJ z)e4`yOW$(QuJ-cu`_IR?!g1xFsNBSpfB+dJ zlV7JFvnTu$<6iMw$Kub9cGGCuKknwU(h=1!Zb%`sbfp+Y3ojdY33Hr;z%}-_?9uxX zcpKoIxAxzPg|~`)Q97pUi;HxU*3704`Xb7u)wgFdk{FBttImGq3J1_v&T)5%QmxH| z%GILubL#y&eDyt^9N<+=tVTaq(m&=>()7C>A5ON?bjVWL(#|A;2xAyX<3WX09e@KB zcn}UN_Q?2s`#xGWleFvJDw``7lWQV_22*g7>O+wfeE=9|IpIzYKN@X+Z4cTO z-{MrC6Ay`Io=GQpKe6U{4v?@gDj*iB%DYug1O5Ds#FLu&Jd=VmJl2!MQ&x`Y%S-;c z_t^Su(~598eMr%kudTX&U61K#9MNARe$Ah?=fmHR_6w%RAklnN3YMQoxs^o26Pei|Bqr zc&EYsDDVcY9*^T2L2Yv)vA3N*Xf6>>3r66P{{R7OFwY=XWLR!Nuhzd3_*X&kewj9p z;u|P$ZKhJCWl&1zx;IsCLI@(iJ^m?t2k}3{d0STS=$~HDUvYL+&Bc;0PcSbGfqJ+D zk;onY09D91(lw&6_{V8<-GKtMVsEg&roqr0R9fuVFc>6GqNx?^;2;|#|AAfC?(4o^an5<3 zheMW;_tR*3d^Pj=A8%B+X)TfGx&zNZqW*!I`nZqzjN557Ea+;{X%xJPBAY1v$FXOg z5Sip0+C8X6pPa|Z-4kn7ui@KDS)Y~aHxaB-#|z>erk{I;l*{1>*!}%U=~rc!iP4CS zl(!Fjw98MyPJUdEgG`Dm02H1v(%e6w({=8Ao5%^weM%8fWiw^V;9AkZ&0NiyFEx*+ z%0TlyLk$P$oQL{?VvWVYqETCmS*tN;Y&+bOZ!;x{8%Wuxd${u-;Gh`#*RklL7K6O5m8 zzTIDtP-|b-D37#HA+y)Ufw|GuWDRpSa)>klnlF6kxoodrijta1X$!qB{u!^wQx~!| za=>>!wa9piV2Ni4RhV-1zi$j8teoiG+5Zjw3tfD^F5RIB3*EOLGXs-pYkF63nX!~L z-4Y#-Sah~hWtTJ0zwi1|Ax(v?Q1FO~WDmxZ*vt{&z}a1n5uec9e)0BSZMS;*-TWTq zu6&a(NMP);3hxSb;yeLbJQ_OX3NXwwBf{?;x`TpWXUjmoe6ho(Uz%HD zirX%DDRi?gLl_0Vt15RRZ~eh-kRTFg(_**uhaF6aa_w7LV1_E(UgM*Z|ar8 zP{i8d2e6e2^&U1cf?+?v;La5LFFADDjZ5_WHj^J=0-Rf_$aqz767n?#+XcXj2=t2T z)Cz+u7cGd{d7q5i>(a3bzjik!EymIRUkQ8*La5*E`4H?FL??hQJtwQhp`cd{+5N^e z-w50)Qh4znV9NHvc{NDhbaaA6l9{{A*AMdF{JqmrPmjG#OEcF`<2O7ln}$^l8q5Q_ z3~2zmwFjpL1n!+H=_6QI$qTOavWtEg^o!l;3r*!bjSXr-m1fz?EbctRS$9=|H*^!f zU9e*6u}RTzltrNmy9aaYl=Q@xK!ZBzBYm^l3Z^={$iM>SF^ZW(gLMXJU0B}k%)ah% z{MRS3{T|Jo1aH5sytL?{?IhgCO?=?Z$US|RLIh2}PYl0Z7zYLfJ15OuqaG|qvXp9XR5lICb(`@!+K>xEp|`(V z&24H_Do$CRx~88j+|m$EXZbB~TqsWZLv7+maGrF+zN(vF$X{B5;K>YXGth#~P{R*I z44sO5e?HnM1Xk7Ml`npXUyj^8`8N<7Zm>%k8YYmsOUfjg)?T+HUTiAu(yz%Z`Acz1 zx}3KB{nzy0P438Jq(PR-o!wVK0_pc2kbq|S)K{7x^qqYq4!)A7rH)bLv6KWf_kJ=F zbj@-(JL(FKezf1{Kli6=nW@+E_9<_{$&5HVeMY2(h}7{ zG44g{P>l>>UHhL;-w4UhhB>3j0`47LV+P88F#P6@&yuEAfKTwIyIfL}41LvcQkht3 z5BHRk{SC}iCuYd;6>?pl6)8}Wf2>4w#t{49WM%IH4yKC6ep)v4Z$UR()Y z^dy}bL)Y86WEg9tN;?KG<_g7kf*`5k58E!Rk?8a`~$KX>)?T3nj(IvHM-TB2nqR& zwHh;e&^S!VU2ThBJ4AKR{qR#=8J@1`PR%qli{3L)KN~t;{Ai@_=0@G&NJ3HVQAku? zi1%IwKtNl802S={uXEGiIq<2~VH$y^BQ0J}vQ@9<_R&)z*;asFgu04C?)dW#gT>o# z-$NM}$PffDod^XQr2X+gfy5l^%omjL(j|L}h`laEkZRbG@y1{TEe(Z)eA-yXTDu$F zgg<8li@uv9p00XoxRowtt}u@STuZpgtDbc~%G=!Xm;e)_9g&9!ob9OuhW>TQ4$@lu z=cX=IHkH`bt#177R*XntOgkQY|oNBJeD+T&O zH4dsLxvpDZ4z4?&Z7e}<=n?w^6K^;AE#>Q5;ZqU+BBM1Ug)KKi{nXh$i+x(%Q^iSi zAz)33^x=rr?dI*nH9B%TS;5|#p3b5r#T1?)4@^<&2FZLD_VbIur3-s2O@2kJ8bm(` zzBuo_)RP!CcDWN2E^uS)ynCY`955nu_G|yI`v5;~tlu=eEHp2^LVt%uFkuN%+G*P zHEJXcN);Q%B=2&ln9=L-YwUWuHVI#L^7*t`x~}V5#7;iEeHJ=i6A{sabFK6AM_Su| zfK$^2l-&I8o&P;LWy)`BFFC*5QLV}hjqWN9Dh_=#5KI7F)g6(p|EaCV>$J?RN2<(9HD(@DunE zW=a9~0U{@{EE%z#e|E0-d``$AXYJgT4cEU^WZm2$4WPmz_-Gi1`JPbD7fL)0{B$0X zJJrVkO_&F>H~$BSv>#wxGarUB^l=2y=F%8sjG8?n=W*cp{It~zi9+dvR|9Sbppv~n z65IBFa-4E+1u_2^H3R**PL~82fZ6n0K2e`{7z^gmr&;iM=X~qk@!ZUZ!?*W9Advm0 z+BGoS&e*S9Y_37KX=4X`4<}&a+kz~s-Mm10Aqu8+5j4>*Mey?^SH{((w;~vW)vo?* zVFyRDl+MCea+hgjBiJTT0yLH2tqz5s?Dlp4?Fh&3J{zbCy<4ILdBWLyDoff)ku}Zz zbFzOriP%n&uHLTj0-8pIP@FeT;6t&`;Z3fyn*6{5onUC&uK`?F&u&k^h*YJ&U|`yw zZdpu%0mH4i>ugO|37*d7m2n1xug$5m_84jRboEe&`V`b%Rblt zW19){7I-DqP2fbadZ8eWj6?aNy%1%IKM_JPjkxfEq{!-f@OKw_?bIqh?(>~gFIj52 z9o{oQWd#mP1y+eWz&IpfW?6m%W6&N?&y&Cu zW_`teVQg)!tuu9}%r##o_Hh1eP`ulzI|K4?dL?Ut1(1*)3@==;*<%Yz=jdyyQ0DY@ z#Lbs3Db~=IzwE5d)$q}5=4Zxdpg*97?9-{ukcgxTq*~9gIM#ewzEijITS3YED<<0L z%rqg1e1S?8?hC3ORis9mVD}2LL=XLFN?+>a#_IlZW%H;_=cngbeX_iAU*n66QRAqH+#k1diVI8%GCIjN*tmSP zMRb?pA(s>6MuPykAqiGN4MwLf@P(V;dI{6WGE)TqsKQi{af?WW-73^`jWsdxV_vxSTd~+1~9dXBBk>(qpADP{xRW03Hp$fK6ies*-93mLt5_qcXxsU>%WKYW% z>6F-_)gO;>X zD66n+NhUV4z2r=d!wHooNlcH@9V3^9enxVlM1npbPixj3!8E_k_~b>bNXyEP=;GHW zCOs2dDK&%$gn!l&oPTCqICnTXM~Nb7Zv5dW$90y=#;1D6PeUp#zRqZO*IHCoR?xrn ztfW?4Ng7DuLRKZe@_%PRB$27JYbIo1R0 zXJc;*Z5T&j@RJ082~yfLd)pu_F5&Jw8pxmd3d~FhTt|T0ur8rLq<%f$t@OwvhyGGM zYioLaOSnahWVhdQ$A!>6sm1n}D5%OrcS4GB0Y_Jq1J1pJ z9&H$Wm}`H4nnA`E`a+hfw#oW87`sBhOp#e0Hv5eC>9)=mrme4cv*PAhWZ1>pEhc0I zcc*0Y10BGW;9eHI4zf*uIeFNcaFAxzk;ER8nQpGh77dZ@<#Vw(4g>YSk=^^8ILyvx zI$W>Dj|xTeYH`xTl69sl*4IrOeWoopCYTyKUN_XgJ4*eTd@}y}@gGvwZS<6wZW+M| ztfts(?yhTuu@y-8(ero&b$Xrcc3gpEKp9~A3wV%bmRehLmjLqu{50oIjg z%pvQJ>{7r#0KB1zV@ofTBxaa}qElgYsB+}l0rNB5IPt(pY3%6o%BM+^e6^`tf(q97 z`>C4>EC)2SwARH7j@mTuHvkX(^d83rFS&zOFYQ;QFy}=XM;NvH*z!;)3bWG}9q<3d{#z9eak(&(uZMv=`qG zYCr^iKkFI2@Dxp4sJ1d&?-0UHO*CX(JA7&mH#0nSRX`n54`Vv_f0 z=w9?_+i!h3g7KAxZA8S)*N0~8KICQ*Kh=GywRKK(cq?)1lQqDGGGCCWdty`I@TlqQ zrnNXHfJNd2ap$VvN7v+Q=~+Jm%Yb&dp5C#9s7R4c5PuesehnJJa_hFY%DMw-PtQwT zK$d^PO6z9$>7^Ny3X3LJYN@ou#H?-fTaEWSc=Ip@|)Iu>SxtU{*+Pf$X%an3naDZb?H+&&N8FDttZN%otTe_VP)^ zUshEs7)f*>G6u14kz#~soDMGdw<{s>{GbA_ax5_{lx`Bhke4+SfOwG}im$OrW{OGg z{?H$6DmutfX|`Vf7cEJ1V&NEUh|U*k>3+ick^`dMc(#E9ba0E^V7^8+{0AU9yx>Zu zg2_Hu+wnPmBH$-x^iL*0*S~90pL~QRE};Z5-34sOIIjHdu7rP0sar-|pBg$cX2q#! zdG<)`*VpbwhBHGqS$X$w+ItHqu&IS`F=8Mw(sF|T%NSw*dqrmV%0JQ^X~7yCx#;tF zw0cpv8(CJPOgrdg2)&F-Y1+aNRmJJ{TR$u+K9JN;HaCxfA1_A%t<%O?q5cd|800jCN{#I2-Yc_un3#6fmK@OAP1e?!582o_Flfp5pHNEu!#7R~ z20#Zv%bFM>o^oWQ)|OD1wb@hYoV4Ho*emYV6fYh#KMS2qhH?_Dmh7i&V{2=RQOmZY zgOFRV&h2s|G*1424pROhT9a`&dyB6$Kh-Ru%wIeGAk8jYS=T97bSCjNGQ+(rcB-4$ zO{&9o}N%`4R^`XPQ^swr9= z5BGKJQ1`lLLq|IOA$WzeFOgWy|2bc28U2m0F9XDcrjKuL~O||<3MJw4bo2iP>@i#lX=-RKqDI0yI z%n}#!3Hz=uz-Ue>%1dhYz5q!~hdCSm)fDsNobX!uhZkw@bJIQgz=$`gaR-+BNcQ$Y zKdM8dL^+%8yR*QojF9Djfb8!;AdFci(`@TvjpouDoZlrfS>oYaVL4ImL?gPkx+?AO z(2`n`rHV!MuGT-h7Jnn+`H0T4bHfycbL0Dv(#akK#xZa9YXA}txPP6fu)x2x-xncP z4}I8XJsLL<>3-b*3N-)J?B>w}T%(#UCVj1G9m1T0L(!n50;yX3Xihd z))TzNGanMG#2Y8tzwO4W9E^5{lU>pgM1X3r3+_3ZihUBr9i|{}0TIttiRs6VRs+Qx zy{SF{rg%AZQ`%cRje{5v`!~kOe3Zh`*8!OER@b!K&L^zaGm)6OTDQOR3C_&p5w07? zxMcxXzwf%ic20)-`DG7ynfQ%ugT5DiN>gK+=u9CA6WV8c-QyaT)bXu2k$kwCt1sEc zZonO(%7-ODI1Rvx^DOSnT_!>V+q`wO_qCOkC40Anv*-mhOb<7*)`R8ARuYKYEtDpS zQ9MPh%`HiG(VxD_d8%&n9{j(%Y2?hOI^FO<@V4%s*!NG>C!YmijZMu_N-89;%GI}3 za2^@F29SnYH#*0uc+DPdy=cYeE9Y+gYmiPtbR>YCWX>}^-}+xb(@-<=5$b`8gMD#sBKbK0b| zwk)O(5O(oB5C!I$pEArf|HcKpShLEzVb>XX(h@ezO6>*%T-Ju^1;nG|A`%d^&nxDG z{-Sl^9hyB~JP_W$*H3k@dr<0~TC|@@=tp*W;e|uJs#i&qCcXeFNdocZAa$hUwosIB zs+sfVy0KQeTZI`rEp5cD?e?vnZ=bv3R*HC!_|6pVrxhrZD5(CU@Re{zrYWTsOYHUq zT9ve_C@vx~Rb7b+niDO!SOM-MfygkG1QL~Qeow2%UhQtsm7~OpX&;))I6|TVh?OVP z4<^fc;%%@Q+yLt%R^T&GF^Mb@dJ~=zD5qBP?#Z&-i$?+5{Crn2EW*$z*f~spq3D6^ zFOm=G9c*+YQL)ihQk=X7sLF&?x=4sbAE*T7NvbW)A?fz@CUe$UIRnzLzZUlJBgomy z3n6O}@h%xQN$pKCGiYt!%e&(sO~6S1;6?LT%oEWTxtTRc8q7D;$sNQBcNj>Lt`Sdg z6Wmg~aNWMMjcaF7Mn+hkQM6cMj|PnPtwB?L6B)&d=dj{0bXi%+rYX3_w;C-Ct zutzKIByKMy?owzI&7PufXbw^4wlRpp{_7pX;;af9%8zNHJ;P50{xoRJJA!^us{=kQmWh#~3gia?@!mJ(eJmTyKbMsum1lj9IyMpzaO*YzbZ*8Y3t zq-BZN99H2=^yQvCLed-pP$rKyTL$?HaB{=Qk>-!ehE+tM(G!Bg|K@6MoLj z$}IM-!gof~rZiDkKoaE}i^miEv!S%VG@F1OLmUfwGBeK!)9iCpnculeA17xDn>snl zOsDNym$Q0J0I4UOFkUKxpn{sq_iK;pv z@yq#ySfU+`?Su=j^79(&6_ZWdBX7}F}-s&21`On&Z&47YU~1uVTKCjCjluzo{KQ6u9V}Iiv3AO-ecQzwM+O zySO=wygUnh?DxQTbkR{-K$*GRv7z5R;Z@nPv~);1Bm{F?OHkpE99?AH`Di5Y$2kxb zZ9L9@;_hoji2B(&8O-;yCZpQD=dks^g?rzVaukOvVUM`@C2@E-2|fo-S87U$jtfC0L{1^^_x#2CfYE9yTnLfbtw2AbA31ZHP27%>TS=s4~$+R#eD>RJ7+ZZH15f6l_g zmlrs9ovd=F$WNf>#IvnIGb$%bLgJ0)b9~oA?0E)Et-a89Y+u z0ql(MNWohF13W`!tHDoX%f3zdhTBB>e^k5u!`I`z@%-45HC-~DIx=`Mdsh)j6bGX; zrT^E1QafkDk~i4>OEu-6C|yS9XLGZhETn76Es)yfkJTO-?@R~f9ILMc8gagzRIAV= zCJG@8i!H1bTjmpQoek{jZA`riyan;|PSCn+=aC1{U!h)ZoTd;GvAg^HmR_@TfG5BD zGIkfY%auZ$7(Fb0e9_2JQS&9qr%B=aiCvq$H<71&GtKjGcww|21ZrqPJ$yy&8X1}SPBo#k(qK#f#ebEC| zM(VR<`g(SC++_nhIIhJ938(*!x^kv)OUVTo{Rook3g6dniDY*(zTb|&xRjiV&ir@0 z>%KdnSTMBQ>5t5D;aI*Me$6F*j9mDOWG{q1LS~FS@2FB@8@jeFuvt`F`>UYm1 z{Bu{v-1@7@rYncTU;Fq}LBD{`4fu@!5uv1Uuj zhtf?5nz`=MP6hnPd`@T5jK@_b_?XHSNHSX@f>Pd2&e=;Jjq#aArGSNBlST;`vxs^z z29+^AJiNS^_4rr&RYk3SY<1Ktd9HZ4Y&5-e2iv#Asx^>lCdHj@dPqf)Sf#GB1LQ5+ ztxjVL_uGWje5(9eDy8snWcT6+yoJD;2XNnPTFa{Lev!XE>J?vEi_cmVE}HDO2jS38 zizIHzT#L_W5Nj=Q2pVP9G`!f5GQT$pEb zNYmhw=_ZODvWq2dmV1?P{ypuRcYc#%$Qv$V3utD-7jevLQ>}%Wb)7trQi%QA4wPP@ z(uBakIqZeUX4nIt2i8qyITZ*lo4NYg#ldmf0x9q$9RH-I1Cu@lX*uR9K#G=Yl58gX9)V%)L(3%qh2IZeo0= z%WMk=xs0mt1r+OV|9MYfs+gyji)@=KGYq7#y zk1vm;nwgsK`tFw4)#NY;Nw`I{AEj)qNlZFoA?VW_f!306xg~?hH1Y_>Jd!+bXv_aN zeaM#+rOm!9QDC`mVuw4hr=GYMr<#CJ4g@PFanQsc@O$^-N5iPkLw(~5Pa>0fM=eZE z8!=!X1*0c}OWr~HecX2c{y^@Sb5Mqu-ZtRC_iV&e?_!OZ4)qA%SosjUjM-6G2EZ(> z_U(7S7fPMlC(Jr9$>bnn7$ka~rxPBN*l;KIpre!dGfRR_byf(kw+rSmMak6 zv-Q`><%aJiMRP_bbz8iGi3$%<1@y)|-yuP7{8 zdJ+-H+3xJ^dR%h@6zg@Lw*pbY^Rm*`Q9`1ysW4EzQUAs=KE@Vq@l&N_X@tr*w`AsX zW90B^Xu$AMt94v@VnF;u3mkf)3@7W`Al-rEmldkiX!um#1`JG}&K7L>U%tozJ9|GJ z9ZYIw;X9|Bnq6mvcg3JNi~D{copqXKKD*5FD!Wkj=K(~N!m?OBhtk)Zo8r;jvB=|B za@p~OAd`P4^;sg9=R zq!+P8{HGjsj2&s}y9H{l_Igy$01y=oh zkV}1F4VC$q>;?L^QO|;=V(7!jxCnIvX@+hxly$yh(yofwX0vrT%MM|@8 zl+H7mkH#r8gz(-M9O7~3K#nL0@`fY5$8-=OaJjh5o+G|PP-s_O8CFMIetaz`#-K8T zmc{FL46L#YPvxc5yyI21bH@j~f6d{5!x1!DsS<2~k_RsUDLVmgzpP1To54?}2QLqe zlKa`XP7)lSC@9LtZMq1AD?)!wVWqhOBHhuno6e9j&3lMsx*Fc~w~jz3i1+;+OMM&g z#1T%cfwbc8BY+=Snwm>~mq!;r)j9)wOnWF%`Yn7eOOt^UUd9bf+GvP5i9irnM4TV;y22vlMGSo#Mq=yyG@bea$qk=|u-Llr_DdshbfxM*M&$8r5 zoyUiGX}?&dp79;9gkpW0ln{prr~UWv2t|Q{QfvC8%<;hO7IojxJ-AeMtBY30hpCo6 zZu|~&+}C(C`XyJO1)F9;V~5z3`IVhxm#JmoQu>V$T&;Ud(~ZI}nlCtHum>Ehu`u4; z@X?s8mhWeS?TxJ} zrLUcHYAHCf*=6ohQ~TJoK2}vFp9|8#I}ht5xVyK^=dx;9*dX?HkBK(3>$&)FA-U7a znAC`#u9Lf!8ZS|!pp!{}c`gItvSvHrwTGIF-ES`^Bi@qPHitmhgEvs>;3aJNJwRX~ z#-yt$=;MuxhhvEn!czThZVVS zseV0J*Ce~Po*G{+WF+s2%)uyVgUzsu@wHhi))|30x3@=MauvW-vrHID_kB6PZIYI@ zcSW-hqumgJ{aBN{+tju#6O}YgkrBQww>cu#!X|brSI}L+cM}8&zrgkyN+vX{SfWcB zJKBZ%(S72uAh2C*eBZhR_{eNys`UX6JHOi4)i;$Q6Wp1}I8x%*#%GXwtDcdX1$aU~ zw@9WSZY`oV{)hp4igoVU>jXS}16n}{&~(_#dVvVHF=DM!9cGX@kr|oJV`%(rm(<`1 zNO*zcS*YmHMpfi{nMVU)oPMf{0~-g%U)mqqyJp7feSo$jCCOQIny)l;#;z~f!sm&- zUPO!Oh>xc6)Zb+Qz*8H2F6n|sm zqx+Mfyjpn3a@h?df~LngFohW@0dw){eXQi56!q(>pC}^egp?z;31(~X2w#%KE~eYT zk>p22V8t+ci28vd8@@og*Q!?%@pfb4X6Ewptmy5?yrDUj*|2=u~#S;eeHIO^yRhw`aZUT z@nk~abl3N(rSqG1Bm_=+@l;{?NXx5t@Pl*G^TUhh;tcAInGxdcE`xwO5qzUxAzwPF zz%w9X1%4#)dt^khld|^je*ny4if+G%vBcU&$4+>nXl=}T&jD7!ZAppnSd#yMkpgL` z!MhwD*snfk$G`H=@E01Bb*4ey7hPOp8Pnn7WB|GjZ@Av_wjTV%y<<5#pFiBJii;F* zA)_H@Dc^9 zY8R)kNC=t%5tpv3-O2fU7w9pLfdV~~)hv5%j`R_Z`*cv7ySZ0L1^4$Wq#MEvwK0b} z0_zMs8?zJ`6Iy{f--TF4jMvi|t`*zI5sSRAHZb{38}|2ESe|!xdANGN3Q@gm-b(|1 z4ea>sPWmE|A{+T4@XO{yutKB#{NCOumfOiwu)ep}|8?RSmEW4bp&>5yM}J(#2#}S5 zthAcf-p`r*y`}pKlu$^IeAxm$3tbSnF>OPKhjCtpQS@`#fjW-;;n(xH$6raD3`R!y z3#~fqABuoh(CGgFR9|?+bpVd@ozD@fkWV(X&&dZ|tiFtla`vkiK30AIw)oZ4OE;oQ zInf(ytUepD=rnES$Gu%$B|gW>xR0C8rCquHqM8nghyUL$pZQQl92lt$~SB@j8*&ORR@ zpPv1)xp)K=%jvx=$h^Ns31Iq|u`vfXt8JaQ_X9P4lfVr%V(Q#J9ryl08>JTiZCV8@ zPJ>qM*EV-Lz|$B6CL~T6{Q>)RW~Lh^KX20Xj#KCr__ot`<0(NsN$Sz^^@%ie;e09O zW*rV1GW!n1JNBt5L=K!TO|-tK5wfHjFI~DIJ;4tVh*rWG`?&BA@kg5@8BZywBkT6z zyUX3i*vy`vIY*sZg45Gh(Axgnb;HYR1(?CI*Rkaj#P&@F(h;TLNMVVb-qV1d^#!ND ztJf7+MR}xAeJ$@gl{T0PK;GUIVjq8)BZlD5tO5&~@#6Za=Tf8*{Y+HWdWNCZ%ZFy)N1gGgt}*a=V`f%!9BmQmCj{Z*_F#ww?$d2p785)nuxa((R_c?BLs) zXJ_7y8y6x9?wsu!VcsYAs9_X1*vu9~J!E1&-wLIKG;;S(rQBB~1zev5$>Sz|ZZNYK zUjDhi>;HjeLo-ytVOT#uOi{MeL)wR$9$F*&)l)w2^CoHsO&7brW*Rzj z=nq*;-xNv-qHx?ImM0=3U3GMC@R2FaA@NwNqT*Hf+oRa3Hxw8n8#hU(Nf{yrv}akF z!k!?BRJtM+c;ts-gLvpqnp*&x<%@tPM2YI6LwQe=mndV|9j( zc^n0Oz$QiNMDPrX8`JF@5TUreks=|oywX1+o)WYBh$A&jZi9y*oP=!6qa7&8$dZAP z#Vl@6frb4lhx8i1eWKTnDi+3C8f z0;^5hHnaqybeFXTG3OEsn5NeIKsy`9GSfaQD_o!X`GeX4l9!ZXDzrqEg#f?-0ANIn z0K$gCI?0euA=G}C`UyJqkwLLw|9$WXiSk}*+zDP#4oVlZT0n`E#+r-~9VGDMb+MFN z!HB*3nH{OM{WeIh3c-Wvl`Cx`c?fwYPml>>V=&}da{`tIClKO?Kg>=mOA-Yt;%=%rJxP1cV*GBbYt=#fOg9r*jm`S*Z%pB$+g=l3(6}< z5m^CyS);(Rwu*Gy&ICvN}X_ac^NH*a|~Ka!4Odw9X}6n z2*XIdySZN(&#HB=UASme$8x`76lmrRKvO@S+;q+sXq)LqNC;xsz#y zv)j`=iWReR^&cupR9_5qL$O0!eB0--mJ~?4E$1m(yXlh*n%sA3<9;7hSVY^I zA18;9uWGp#|j~;6h&eu|l6EXb-C(JlzAI z>zX%5bU1E7So{KJ1}rINU(AWM?+W0J<7T)Nhf6;S!~Mcaf8@OB@I*=bnW6KS;3%2Y zybqgaIr){2xnQV`M1G~&kgu~$MiyO$m}^jV|sAX1f+_Pxg15-&_66qapIy&fbogi&n_z!Yy?CySNClbCl{QleiBk3!r^dYvqh7ys0(}}>}nR8Q7 z8G81DHHF5`xnHvAC-RJ54U7Dp4Ey__2Y$(9^MA)9Op7Iil5s{Myj65EWlS z?;rmmF;vC}x(tj5)k8uRSP!#lc$}G{s~33!id>Byh`Qa?j+`$pu^|+~4g>gtF=ldp zlWX1jov-diq73OG?!9-7^_Y(S$n&1>!sQfsj|*?PYQ|jn+>5xxh_7n~8v#|6#!#hz zS!n~COTbWbKw)6cjr$uLjzlTBK%)&%Enepe^tlFk_Y)^V=H05zbB>N2I+ppa*Z%>m z)@r9<<@Z_7yM8$7@xDD8I*1HWAR^g(^ub%jt@bU)`@)Ds#<@QSwiJUWWO5|dl^fe9 z(=nc4Sh8>cK$Zxs?HCVu46!^BQyWo{UG(6H|M*%)@8*~0AkA$X zFvgonZ&a^d@rmseD@%#2d6A~cLPHJy(l2k~SQv30AF~Ei=ruGq$Ptl-(l5}GRMvNM zPV|3ZN@*_fx2&jk3Ydj@luur&4{>{JEA;-(bk{z+?%*k@aH261YgobRO&(i!zc?I= zosp7qk@YO=LSCRBfrA!tLmWo?h8ZI)i9rfyJqB)8);hGrY}g`akUp|+pl(C+MoU61 zT51t}+~bC6B+4i;Sg~=ceVaj3(F*Hg9I^Ad5fC|1VPnaUc^^!5k6OqclWxmAEBo5s zI$|@zRqxP3*2p>d8g)90;i@E#^L0&<96B!Gn=Oe>6n7BYx$_tL;WEAK!^;))KuD$a zsk~(Q-SbUysml0z;nh4*3hj23k`keZ&|J%}vc-u%+B*H0ZlB)oWZ>GhFK|Pg4!ko` zbERz$6afL&wGz-0|He-CUlfM3|NXtc=zD49@JLxOSor#WZp8RykgEF0TFUhI8{;xV zjj(0KNo^-%WRz7CrvW~}bk|P?;CCt)26lzOZ>OirTa$B%&p+Q2!G)fq_0m+bhWd~x z9zA)XW*D5qYp4t2&1bQvN=78Tvi?ul%eQNS6#Tuet4p|Dv<~%jJWa}4+cL}AQj1e3 zO}?jT@!7bjXr>#lo+b+X&d^n1>&4dT>DU~kgd>XYT}wz}03&o$`<8t^cXh$?_)Hov zX3jHDC;Vx8ILS(nz(0*^k8YOd>s{y5{!7rM66bP`k;EP7q5cUaho{(&w_kmJPkb5Pa};Q1vM=3<&LnF0 zR>1s^)bX_NqR}id4|AU4>O7Gc3C)U2ZFjdC5Qi$qPNI#AYPWPw#$9*u0^1? z-PpespKh;b%;6 zcGS?@IO4GWeWCD)0g)cc<7zWBm8=W^hj*s&Bh^M%h z1s~%X=N&-rv$ToUmR-Um-O5+8633S`c80>g*+L2a2n*)J`a8l&X?{;AGWuF-WU2Di zc7oo$9~dA!dIozOh)}zvTs;^-bQMB`1S-O# zLVy1~&B9~#uk%i`*G-z1)YAbe76@BtOgW+Zq+xFtt7eMttdQubB~NC`zD>$1qm2rr zkji~*;iLF>)_HzZ9`Sc2UdMxavX$Y+p#)y$Wl0@gTc8*)OFKd*QJnD0sl+IEt_G*8 zv1&>DV8mYPFblR=yZ|r)?N3?uy184x@bLNRY%y#gefxB%CmJ96e^4IlYs8l`yrl_$ zRq4?uZw0@I5+u%73lgWMOOvQG$z|`+XSmo4ma5kf8?0pKc&$^ z@XZx$C`flz)6ug%&-Cz|f_G5$Ns|!?(H%CU#fQ^^h(Nk+cf~S`AhThz5Nf}T4=r=z z49)x(;~Tt*X%ysEx-?Qmg#f;sHwNY}yEi|60T@*Ty+po1&}{q0TElNc#pYYBy*&K6 z&rStwaPj@^+_HBGWL1r?0C_~1uu&;D{^3`XRn9p<`q|e%FR(Rj^^E6Cd2{d0zQnKG zU&>YB;9)gg0OT_^L#e97?D;fA6@nXL9TEv73gYuBKEC&cCWyPJ*eBnX?9UoCs}aYdPsCV&0O-WsV9jmQZfm3 z`BnKCy<>Za6r!vf*B0KIaW_-daSX|xf-jy8x~l@oqLchkQMINWLb zPpN9(0l1{Czw#A@_C@2chKmq#-x+o`w9Y`eX3n?LurkEB=Kya;nxuYsb@5}uEAbVy zG`9H4Ez0!XFS|Rp`rcskC8scBF`|gzVpqBE2m9B+XTu}EEcviGJ+g0Q@x7+vmTfLV z**XT7t-llp{St9*utY?bcuc$KeSLdFI6}!>HLdVPs5gm-3?t*Ee&RNMr|3f-R@oGa zfiR{qJw_?8AWZwG1R_Ja4wm*>G{{0{+X^oW z)mn(|;0PMFTj3g;c*{*|C9G|ec!_GXXW=T%Vv=}q#BUd7Xz&r_^EkN?S%fIM=|@%}%wTva31VaO@Kb2aIrF|^ zWAhVsQ(*?&ZqyvH{Y>(0w<#G06EkrS=49A{lQAE_H~+>6AtH4{_}&Iv9#Bj=9Hu??!?XE z57}LA5K`pe%3C%#7=CQ96}ukr;Oeb&l@IonMsAHmC$M;T(KtqcM_zmk+Zc6#Fzzq*Jn3kecH*}C zCq*aP%j_3Pv+cO`E%#zT4AWT*orba1!`R2s{1kLri!+mh54%&J1^87LBoF<3_R8WF zsT7@>1#vIlICK>^5!}ev3s1Na?Ag{sUAC0I?!fh2et4(HDL$(35SCB3c+E%l!@Y~# zVHa~#O4Up8Kq_58*jeLbC&(%2w~vAnb=2Ehnz+`o)mZ(dAq?{+a4m)fICRja9srXM zJY<^wKb1Y3zYC$iYolZqRthr=R@_@m`8|ZS(d4QC`~k>`O-D;3kBglYi(7Z|8O}&n z4w^`B8;HYnR~4f2-zUrTi0C`VuGvGU)==-(q~zD`V+p(yW(~r2`W)$!3*2xK|c!m&f>u_`4!eGW2O$kxI zzwB#seq3MrSJTEM=|Sf^_~PN&*`q+Yv)7D`g_qT+wVOleB*1%nfI3-MjUI?zcc5V& z!82R^GvH8J9vri($`hi^$w1WE^B_Q3?p5n@B*4y-0U3lFO{?Ls8wQ~ZJ z#KX1DSj3!bldqo}nDEMXg0KO~kDG!tCMDoxKQrXBp*FAN7(y2ULrcQ)Z7yeBR+pR2 zy2O+5Ib-9kH}k>IMr0t8f6!^~HH~g3)6atpa!#>H?PJZ+A|ojDrFTaDp_NnT5H6N3 zeVWppXb4F9$pqWM7Bsrp>&fG`ApCZ}&&6Y^FLF$L7u{WHi@HL$2Sy0-qwwz&brYUx zUmD%DUnE6;1~`|)lB;!y`5$2ql};J|9Y5PeFnf!07-TCpFfo*!& zCYESX-Asp;{?kWVkX))Zh~&Z}n9AcVlwL-YyR>cg^gobDCKfMFaJ`D0x)ojpdvyuf zhB`y1U_Yg!lsNhWkn9m|CcA7r^Jk-_1^BYSfIME9^0t?e$Rw<+(U<| zk~^WhJg@^K8pi&tG?BoJqE9O4^=g`z_=$W-?^4Usj*2!DNK}E2iRq?u%PkgrnnY`x zrDCI7ZLj2mehSn_qa$l6m+8OWRzB-~Z8^E6v68wHY~~!yzCuhGPfaoH#cq7u!5XNB zj_&ECUG#13_K#?k&iFOR%_TupLocqKDIO+0;QoRwVm*8ux(=QC5A@R|94D*_j>WO? zc19~$oN2hZju{&5YsxdE{Q*g5KJ^yl(HZ6u<~C8KC}Kxm5?)3Z5yX#f!->-SSuep$Ir`r#O)E(yd9v6L`8FMuGnb+jUpbY2hS20A2)B;wZW158|AwZty94SsTv3yxiuqjj?dg z5=zx;j0b6H7eIkOkS6KK4heO_@IV(bb5``nu2K&*`X1ct9#~yn-YB;_?!9%{?s)v% z8sh8R25sf1T*bsI7vk!v1^95`1>T65Y8W61R=2^Yy=a z9ouu~=;d%wHe;Z@Of~vTod@d7onlSOOug@3l<06dxPEmQjjBYbhKOJ(JS^G2teM6? zuJbD!sG18R?<)MVp)3A{nB!F4+2P>9^OiyFH^XyY?d-kTntxgVFp_2I!jFuNZ6B(B zY7zZU3Lt^W3Cw}S__bXirT(NM#QSQ^21v|w-%=0=AAJw&7p^|C0u1;?cM)D|5w6R= zQ;4=E|EtKqqms+w{8{JCO@X1`hCA%KWR_|83Cv<-Yiut+-Nh()psT;5rDI>=m{D;9 zY0CcSfz!3Hd9iM0wO<@h&xno1qZ_rg?SMmf$(f)RL)muay}VMNTF)?qH+*G#J68dE zCp2+0RQTrvw)JVfhJbhz{AmL7gfJG$6p4cOBZD@o?*W!oDCE!dVO_Vrt!(p`T=uNC z{+$f`2X|7D=0bj?m9*`p8hc9Sm;MlRlLm}bH=05a@w(~o3U?yye2HrW>$-rgwL^Sh z-4NqrFX+X_~K+!@hGes|cXsm(h3eN?d_d(-tj%H(>or ztnFDsNAPOPzF{|s^h9Dy1D{pjdtF3F$@kCUiy~~e>COjGb__K;`)4-phR$DY?7O0S zlS1SDORUXQ1=Oz65Bt`iD9mkuj~?7wVpT9_<$_dgz){^5?Mas8*t&Md|LoW;P1PeB z&XQjlD9Ng1-O}(o;c=MV|3S3ney&$O#iyD! zm_RF8!VnVLE(vMznY}JiYb$20YvwymDVFY4=H|PIRC}!Y{uGch9uh2a75r;%a7BKQ z5-a~36|_4i%qdN!%%mEt_MxIaMy@tA#ptUTbzsX5-7V>@7_O&-79-)~g4BMrEE1z& z_@!heTa2|Nx9?c#&3%m#nfC&0;aj9>u)pG!dzv!OjS@kd+jW0@{{xAQi5`$05iL$O z0W=bkSN7?y-lPw7W)0HzpwU~>M<%zR(pWeg${9!*ORA{lNo|Vpex_#8mMc9WNUadV zLO!H!4d(ud@AT>8TH>Rcl=5BU^x3#-6LtF_D}fhoE>D(t+<){6kbe({)PfrzSXvS= zIRITI#}T#}`0S>dtonYDYtyfD6|;*pdh98mzVNfG%gPO(VvzLC>iuj3X&%J5HT&k~ zSOvIk5o4*N+-yc!iRu7p%M(5usF>(MMq-qSTD%fL2Vz?xkS^>&0{wlNMAJw0aKILy z33G3J@R;EFu|LV4f*2y0%6G!J5p@#jd61y3)0XlvYITLQPTunm0q9DG?pF3z8ra%n zCD(fbHP8)C3Yrxou^f{j;`WzWz0}u(uqR*<&A2d?L-aYz>wX!QeD)*pw$*-9 znH>F}-rtSX`;Y#>1fn;cxQbQX>CCdiZ0&fGTQ=9dC;aa!@5%tYvp$=U2a-LMzR_Q$ z{H$R+rul_~_aB3Y_pG+MMkG!HYg48O;oD8U>3(*mRbM`}za>-KQ>=wEB9ae8 zO$>iEF3YE!wJ1_ai-D3wA9~m*IHV(gPEePQj#i~L%w1oboc)~>M^yC^n(64cWbHYZ zhENtE6>fB&aCWlHotFcQ4qc2bpxlfrDuEG{SD7yiJ5+v76JTgnzfKh`{-V|y@dS^b z0LUOk1TO+vk4JtF##SZoU`JL``6h~M(Qm)`ju3dK{@b)_6Ge{jP`c8{cA-Sg`L>$l z9e>VPjZ)O{)mO_i6!1 zFgIyn*Rsx;dr)3lwtaON8so=(LDF!#98Ekab~MvD4*LN3i5o>iI`HhT9_hvP*I;_D zH0Tp+hPaJ>Ybb;=@;}*e_K|)OzX_$re#9UFR8UeJFIpr4?o)Ox3LxgH$Rq_d>c zo1>m0SNKQ^_qGoY*AteEk04O?-9fak{DWj2uYJjn}9Tk^ad}|sHps{QOVh0!VjowkE!tlsC=^bqF`BRpAi&SKqN>*|L8PQx5E0gv+C9QGAc8(F^(irT zG*5ZSGn@(4RP%_A)2!F%4ZbbsSAAI&M#N*FN`+D)lr23fzfJd72TJn(wYlJ-QSOag zeS`P&TBoHmigTsc>~nS;r%xSDlP)cQ?jXv(gENQ^aWY_+sWtt@#4zxAO~LHeSE>#lKk;@v_fDF=WmN$*3bD7^07TY)TH*bxZe)t~%PL}gc3@WaM_j?&$P9sY ztJ}Q+Kbu(iHMT~YRC?q(*_o-gAJ;I18SFM^Mh|?wQH>F_Y74Ea|Jj$)WD5koF?w^v z9+biPX}EGC+PJa9$`Mh0!i_iH_?0$=`0&G-?g5GQRLN_3b@Pt&ecf`VXB&RsetSg9KK85n zS06%@IAE0@hliKyIX&DLzEgA?Va>rjW>|O)% z*03ENg-_2Rq{ARGPl9sO3CXrHSBmU;wl(8?jfR&Dt}bQ;4R5wJ6Wg;F|K6nx+jMat zWx+_(KXnKg8K&glp;s$UYh*XGspmwnj+wQ)JhrmMop_~*LtnY77J9Q`fw-}-qH|3w zF`ev)-oEOGErgr?-T6HiNEcTvq@j?}2U8N2y_+8(3ElUAa7}#vJA|5GQnzvx|Jh4X z24B5L6%ceNBYUtM9171>5`d0FSuvq5!j(V1oL5^Mrxd-q=5tZOYpJ%LQ&v-7dVdOI z<{^U~gN<)@E?LFyI{P_)e>ob$jt4p+lcHB2qv)$VVKm0-_~G9+2dt6<*rL}aJBIF} zt0U%c{5jr}wTqknN!fK@>!o$UFRdDsJ)8K|I9QU5oh!8Gi`ZuT80?Y7?`ArY)5sr$ z;7E@i=v}Pan}>xl=F`{@4!Q=dtVwaoeeQr-bS;dn({CgHeEu))yw$WH+&6)THlbJTRZ!G6Bj@{|Y66t_SCSb-WO5 z(-1oKb$BmL-mf%vy;wQLvXk@%^C1Wc@xmm&TxFc`Xx?5KAV$)m!fF+3jno$7=5(Rc zUv+fT6N@)#bzX}_(&{|VAN;q5&u#zg?k%^YC66_`*24hW+NBt!!Dn#6Q1=gPGe~y) zwkd})qEf>SYeee>+ypN+M6G z3}3}>F~}%7G-GVI^Nc-HyzBb&Iul%L<*z$iU#jnqpTb7k*%80XXO4MX@xufuOnu;d z5>ph9Cm@1v48OUbD<+rHon`h3SS#`5>mM{c$5!rX3G9GlIAF8`_XbCZ*h<<28#+G+ zWL?~)>`bblU)8)L%e@c1;2lYiWEzmrU4<~*O$i6>Hx6Xb8fK0y`Z4tf@pvmyXhq8G z7Kz`zKb+Js&nCzCVL6DFp#YDrt#)i-WpT4 z%6&pQAX}MCv@yh!hs5{)uxnu>OwP+H3h--h_OnlDXj9aZ$x{7Uu4tR)XhX86dTk*0)@id(yUzTxwX35jfgg7xbe0ehCH<`6gXpEKF zcb1Ks`6!2U`rR|Lv2I;Pp#B;@ti-vQ@kJg5wF@5Up2haJ_92-TV@H24g@V7tA zB-u5CPxN!qoRHGVxoVa#!+MwuZW6tzGur9UUs_20MO8LE3Xcu;jZIujr>y$VNiCUhF zkLrN&^d^(U*A2oj8Z!nb3Km%m-nxdt&=ky*y&je*|IyCZINS0-8_XSA1UcKlR*k$x z3(~tt&kI`Uur?IDS?GCkJ09}A)h1DHEgZVfzkyD&MIR(FwD z2s+fWli7Ex_-`PCXSH|GliuJ5SV7dZ5)JBc zCVhIzON}odhgZ0*JX*yB|IBZuWgn)>{NIoML+^tHNCtH8uR$o*`>v>i88@*gz0_vu z&pYi*l|8E7#0eKR^dZGk?c6JJ`5FL3er0t8XQRoVzW)r&Es2hR)#Wy{HGOlQtRODC z;b!0 z**o-{53sp`f?94x0hy-qaea;h1Ropd2j|rMx#XMT##7Ovr`I0OUsqA1p;cVO6FAbB z$<}CzD6x<1u6k1sGpwGw#&*(-_qcW3?|6#XI<83#0G}$g|1VkhBP5!gnI1?UxOr=< zjLsXTZY-r~%Jkv-{@Uc*pDlq%Jok_;atcX|0s1{cBoRuA`4vonIqh0BSHbIl<`-b{ zZQeGJIvKBQcYaHoFdJ8L?&P70CeI_U{;~R^%y7BP*V(Cvx|=^gK2=yR^a4M2Q;8Qi zcW^)Kp{Xxq@R3sFPL znorifEwKFSR`!}8onVN7 z%4-9<9(f+`n+*OYj3{kw9g{?haZy96@c$ z`jT<(i2hxemhg|4T2#>Up#mjhE^sS^#)f5!NJeG|z9@hYcYfUc^mVy_-CRHW>G?hs zqDqrA>}F7xHOS>fd-%Jlv6orA|bVFE8jO98! zu7kcAWt&t#f1xRi$j}x#h11sN7Nw{aJ#&A$JM!Y_Zwlm3=HcC~cHqmq{EHUezq|~K zLdYJM^PE&;Cf&z>xU67xLmz&whQucuK|lt88MsS(F#rJ{yaUo}rQ@7xx}E3-3Jb98d{1%viPvL=y(6c z`7u-pHw~+$h{X{HRSqbTpEXPn__ei+Wj5X$=Kk~H9htz?*dgJ>@Oa=O*Urv$yzPr3G20r|K#tD!l!gITJ*vd7p^In?=ieLGB=SMe%RlZKwJAeSy|vIKIL^@5id)mV1)rP|S^P9p_XKaHa+=$`YB7_C`8aK}3uc0Yp$ z69@E}WqUWXFsI5rY0=dMb7j{4_VL!LpW|c|Ol;D+dg)&Q7c|GF!5K6teuJz@;^vQ> z(}&#?--ZQGhtZSiyVj>YHjLQL{qus{fS}5&Q>g8k#&_HZghg^9HVyj(*VSA*?PQ&- z?dL@nS6W0o=_VIe&AjQjsLq1~Md4j)tn(eJqec;`(abn%6PY&k4FHqlV5Wi#>Jg*c zG}r5I|4qQm>B;NvzUd_kZ|Hd<-^Ki8!0nGxLr#rDHaN;Nt~$WHJzQ(5%1Xj?#Z0YM znap7(Z$9#!qIyl>Sz?{SEY%OF+Tpb4b}($Hsx-n?HgLter3hZRn=~8@kg13PO2_EN z+5!$-r-rBQ^?gE2G?%(>v6aBak%ee@DH8BtI&1|m3|?|!lF$_wl1Vh(Zirx#g_WFu zVPT}5OAVRd8Fv^H1GfXH>JWs#ZXKQ!bXN>SL;lF@;EuiOX2%P>18ZeShavk!ev_8> zI_nD2M2EUHWax9fRA;7fHrawNtX5S69l+eu#Tx5{F}`72AlvCf-E zT+c;UvVz}?{OEV!&E9?!{0ZPd{yqr0Y`9Qjm++&wta4!({oe)|+X`#Y{|{6<*Tw7- z^$RPU)T`#1Rg!qDZfp;ozN&p?sgdyw@Ck_Nd|{M-&X?$bUIGUE2@3Xr?=Q%<=bgE4 zcz2UK)q^p&XIE0zTuqst+>mtjwO? z|8>HW^_*~QvEJ8m`fD!(rsYBWz^nT;kNtol$h+2^op$&fZuK0SxVTDGNZ{j0%+e zj*)*^iyDj){5a)VEkzeLJ4+RjD9Lg!H|=Of@7wJ}d@b|RGq*CB~ybH=>w zZQhsJn>zKG^p%vHulw987501a{ILh2r+{1QS3V0`DspVw%@2%Zk~L^l!4;P2V?_;;?h-wCbq7ICj%DFrIaE(ZG+mY}b^*GXC=6(HEB z@ZiHH)Y`ZSbbMEpX=io5^bQ_Wi+ni}zvcQ$t>#R38`-1Ig)(ER?Dr zAJX0NGB(v>Oti~=G^a)C6IAB(ryd75j6OxSG-WAaJzjPH8*mq|y2nIV4DH4N`!v+M zuQR2!D84~^tETpROBbgNk(F-8?kGz0Cm@x6REJ%UJLyrcBvcs_@O)0epnB2Qqt`fS zpu7Syyk!h5Evz%_MYZTuO+o64ApBgACUhD)#*Wgyqv}_HG++y0rewS18YKAdqevJ=ZVk9> zqrq>FoGsb`JjBARoy|M+P=vUg=#UL}7||xxY+DYJy{j`C!ctFFfH`@qQ5q(jI)YX= z$>Ga-F8;DB>6-^he z-WthUX98=~D<$rpsz}p5wmr+cRl)UlQg#sRyPDbQnE6)rHvMYzp7TytXcA5C;#yIC zoToNt*$XG6NRrqni#ySF<6o|{(U0WE!V6FfxB3WOMaB!f zOHHJ~Z5P$9%4@Rl>Y!QfzWBuw0vVan^!?n7lcv(4YZfrx)^x24^~{{wx#~zK6W9tc z&cK)g!M4Twavt^!{+b0w_L(;xe(sAV{UNmxjsv@(-TfesZ0r*Z`c?+}>JBE^ET~|H z@3D8at!!DgSf`FZ%1qS|qqiWuET-kkXpz6x1g;N0f}zgV$((*H7GTo)ZNrOAw5Xg^ z?yYz|F*pU5y8%AU=j@VD3amb60(p{<|D**CNxUEyw*I=Y`cK`&MCJpwwwF=<-Mwj~ z;KC=KRENVrvBLZny=uM-2XO51EmrPo?Z;W5n_zjs-{6kZJ8iK)ukAn9 z*<7k*t;s)^OQV>E6doyPB7z79jOk9I0<~@E3yb$f>lyUx68?=c$I~fdy|c!eM%_eCL^;<%K0OIQRhNW*qlSM_qTT~NOu8IV_eXN(t|Pa^4mA+e_$$2 z=H*WN(9smMIkQ*^!tBYgbN<8>HYp1HS$m$~peJPwSdy(*xtr}#3CYM4Gkg zPmq{@0M+szwLD)>>4zHs2K9-`niE~Y|FJJlT<*_w2wdkAV%|$0$01ygn;6hTmsiH$ z*q`7F&&J*-9&>6Q?zU)Ng5+5pBWvWd@9IJ`oPTZIRUG5SaXi2f8~*+j$Cq!8c9!-3 zb%1R;rHn)_-OMbJk{)TDvVt`lvrqRC5w6|%PJ+K=cPnCvc`oml>m7mLbxgWAU1*B? zIXNHks;}oWh>g;Gadru)cEbVy;p74e5Y_Z? zZTs~MJN$UfU1iV2L~s-}D>5`@bg>j;)m~q{tSB?DwXNt zbNAyvkPCa%8rd`WAw0UtJ6{ydyZHiP!WbM*+RSqVDcT}|7DRyM=y6}tp zAFeS8ZL9dWuX)!_DBjJ>rt^s<@wdIaJr?aXkJJSmf_RQr*aJ_V??)tzrxK{j91J40 zJjsbTqyiq{Od8gb{sYkia)=nWK*M(QgAv~-;*Y6J)e~v78VvU(W1u=r_?19vshaFC zkO#F8cX@|e>yvO;pNNAy0na|_wtUTY6oHW3f1spH;lAp&!IA+^7Nq4U(ey;1%-Aoi zcwc{c5nLb$c4qF+km1%r$PAfRcVBb3kOePl-Oxfbr9NwVMEadlQJYOkg9x#>PdP|dMfJQ7o@IB@$fbpb+Q2oQ{us&jT}xG};|2ax(}Rp=OGL-M zt0GRAF|vUDqpgKx8C$G?Bi9RNvGM21-dF*U#GJMI;c- z(eWsZ%x@X}Vx>Cy&4`!hH`Hhe)&dix z$~71ok&4$FSDL)lpB6e#;GYdxIvB>6x@`QXFr~@usrC;~qV^E55X=C#0giaDe0jgo zTmiF3*;pI$!pr9Spn%`$+oVp0UW*Se>Y;l^akk_J%-V;|@t@xJ8VZ|~3E0!2fsp7b z>|zwV_=KCn>^AOfft?RJ-ua6?G1yL{tRb{j4HY5%ly$b8qgN|5BwgqpzQPxoWtA@9 z%`n2_wQhI_5z=VQx9nAQ_gs3d2ntDJeKa?nB>GNL~8B`|KlM|8z( z?XcBKrzFO6~6n<1^Y1PRQwbDq{+~!AxE8L ztU%$bI%OKm7h-O*HfJ7>SS%}M>sBG~^&PD3dA4-?k-mn;&k3t?87O3ciKoR5$`CMN<<@8a5llT$cMsq{5983I>;>@6mO7GM45Z_8RL6lFF zf?L5j;)G$(J9{9k4~BN+uzM&W^2+Ay&+q0 zPOCI%wc$2_WI>f`{fMWOt+EQToqck{{#AqGoArFVNkpc30<;WQg`*ENLV*V;NEUsY zx^ChPGMdCxPL^SKLhI!kig4hIBUW)Eh66Piw{3Sc+hgETXFm3v zsUa)Gv^8knsIB`i!|eQ;EuEvVHfka`LWMm z6moTfmFAIw+UxBieNLBkIc6`o7wDEv)e2?rwTg$&c7_q+$w0~nP) ztbYFwMn$ip@B z<@AzL`?96$jCfYH9d3N~mk*)g;e$HW51uGa4A-&zY9x22<38KfONR;0ZjnW}HCyX> z#%_^hWl75(7BW?)s1)VR)1`U&@kkT!a(oA3S#+rt9<3~V4j8VUHhHx$JPd~H>7-sU z8uc_%;DzcGER8p;@h&5@dbiPplEyprSyqzFAN^dmK^ml!IQuvnE_`rrI;k%EL7nNFhz| z<@@3lxAJLsTml=H9FuZd?e*`+Q~9rnBI=V?lSgg60J>9fHTti9uG*4MC(lKBK?TzE0&=4K`eDU*b6CpYt{OCvHIUs8X)TBRsL6hwGzg1(~vv(Cpu)b;0SBj_f%y zLxXP!t;Bwcjo9nWRkJpPL=g(5}3i`ml9=4 zc5g!VAp7c`=d61Qv*ZYTgz!0qKYAkcips?PccDD!i05<2`7mMj#f6hDDgKw&B%^J0 zO8)V4Z`qXx;lAVbSC3oTqf`@kt>c@P9}Wb)mUP-^Y-YF3JYnsk$xc*}Ht-&IbClxK zXHBZMTkU0v8&CM4c5iWI;PXy#3YGJCdZB$mo2|E?*ul+Q1y(vlyK>oFp5Q|?QDM9o zQt1NuQ`4Q1`Rjmd%LU!h+CywogDI&;+D~Wf!Pt1}O`W1+%o=r#|0)sPS#aOV8Y+!b zIf_U$=wR>h9z+_es)^$#^XIWhvNER!WX=8ovR9}HrDm$*vFbWS-xJMSrDY2VZn71>3qQM#M=0^#vG5UyUS@hnkgZ1_?96X*2ve@bljT#sXl%xZhQ6G_Dg>C7gdM9ubE zHCLNhvC#i8(=$!gB9Jh;5*>A4wbqCnG;b;Ow6QGK&LD|*CM2An9=>O&tFr7ZJn5fp zm_DZQ`?)td{+^bb_!*U~qNv*k_+Oq$18eb*4-CAy{dx%Y2dC)H8G7Mi*-;(u4bX-; zTD6NM@;2AD&4j6jJ7RFH1;yIgMktY;IjCK`$y~GzP12}YeGGQx=H3YTEKb;CCc7swcpC0UII+hGl}zd@j>;vIPIzLiL0x0 zBq;+wKHNLU1s!O9_?VjCE^xNJT(q=&rzEChTC4w(s-IW3a)V+q_;L~z+@;rs>WF6X zUrXRr%eht3+;efP`{C)gqAF`v#Fd-uAA9aT2B%A^`8luV0jL)Zf>K5k2mkiyc-2yG zsRGti`VMg{7$brxMN21nsU2DhZ7p@@%`v=p66B5&JCmR+I`0~M+({5@FS5h{81LDg zdH){9?Xpou*;3m7RqHV++I@8Td_Jw@C5TQP98m4c&<`*&ejjuPLuA2`P{5{a&Te^n@KMZOt7*L*JPu26BV?Q#Y>Mu^ ztzTOm8NoM&eYK@KpF6y`l?3)D-xXfal|(x)Lc^IUnhr9l=$o2e3{%A?$-R%~e>wm~ zXYE^qAIFWpop3Tu?X;<{8#BvYF!r4YQu)!mqi*-pA@cBZA?ATcU z1qu}|?y|Ag{%?w%{MyCAF-)dVOY44;t~y^iA7A#FOlw~(qn!q6y3*$y4^Q(vU{3Y^ z?~pH>Z~7z5g`JCY75XGJxNfd@B4m}-jJ-#H&8|IH0P;DLzhwVdk0tea4+qcD>vi&? ze20~yRGC718;dU?brLKO0AoUa2+2kVC-x;OsRvGC?mIQe(5pQu`0~2$s~`D}%xjX6 zRHJ4Vt7*V(XM6hwXi20n<4)j+KsGpHf>g~W(JSUZ&}0MAH^i$6EYbTDu6%V_9yiE0 z>o#zf8~xMqrcZZ!od*V+op{G*rYCr5|K4R>P72&iP2WN;3-3CgxkOzV01@o3gd`^@ z2Y3=s-g%4ImdbR!`{CQ131s?&l`&cUW$$OF^MwQ9e={X>ZI%^g4v!K>>xSL@1m0|Y z+dLZX5%%W~sgMDI9(dfxSB`jB_?L_%r%~XBIoj6%FYW!Mq3^1TU;Em%2oF>?&y7Gt z9Cl(>_)jPTkG5qaR$wm`Vu#vToW6N~8GKiv#EqLUn5qfkntkRl#jtqZdTo6=(%;E$ z>&16?s1Z0u)s1(21z)2Gh1ElPVM%h-pOvUPUrm1ulXa=j`bIh`m24etiX9>1_TSQy zn>)QSa|u_l-w2Lg<*7rvgkLb7x9bjv#`-ej*SxTHg&L@IEmslAu(%4G9Sg53JpP>L zD7RU?@;^l!&UViAg#M%J!}n#^<(CJi;3|h!0f$>ZAz`HY_pCSSEU`6-rD05llrPS? z>8_n_wE^|6XEl@*{s>DLr6_GMI^p$09#&^AFJ>Fy$C{MJ$4B)as8ZpQo^P(=KhWb~F8_|qvR%1qK>EC-VH0f2nHb>rx2X2#;fQPS zKo!QNUx^E)-|Dd8Lh_m6k@F9GLH9w)wgjffkICtNs%#ByvONq|53XlFY)7pjFJn;$ zvvH*d|3YT7x2ir17ubLvq}BhUl=>RD9yCcr(1{PW11}&4!F4CGFjchRW29PZbE+b@ zzFC<3?mwO3`6^?ncL+mKc2Sw3Zup9fPlBlIA+%hVU-Hqc$cNjd&SLmAENv%OENv}ES^XqO!3Cx^giLph>Eyq&>18cIu33MPQXzrK$$QJ0IXes?^|Jf9bxBG zAtXJXd|pDBJg|j1RkW%_61{AK_*xifu3NtXJkfwyZjsj7^FY}_=l!UUcf7dZZD>!R z_gx;8JunD`np7}wx(vXO9|#%G=(srL+JSfejHa6f^!8c^d zHwxV73q7`?&#D@@-|rPkrpzI>J$gZ&OoUY9_qkRI_1Z(q9M0VQj;Ap<%6H_5F+Pn7h%;$KoKqZ00G-d(tOAvj8xj&W za9}*y2iU3#rFs<;$`Xo2SQ1A*z2Y|@6dAcYb~vMh@O4nUMLArVDv1ClHkxSuT)EOV z$S!tbsPf;K`&*l}R^bH4x&7exYlJ}d9w{;@5{U+fy0o3&)8C%d`L#c(E1PI{mi%H} zbfi(7m~Q_{Kci{()Y!b2wU4@Hfh%3PFiAxppXvg>xzh!sQgB1H_9bH~yZqY+Qp#SA zEFl~RpP0YYU>Z@&g{6nGjrj&yUj))21q?k!=rtM~Eh6*PyttCv>KK1f zWDM1|rhme77F8c&k9UKBHpB-7wHQ_l8qSj7*YK>HObWL|c2-9v?v*m!&K~Y93jRMi z`T^aTe^OG%1C{L?^~L?2-sq37HdHJ-uBmDlst-aHGHGV@@5Vbg6ua<*gKoHA z17E`IZR4z)x|>AfbS$s%;cDn}p6r_dWpo&*x^e=ukgS`wbVDmr^S$HXh66un6If^?G0o=|hR}b~=F7L8PLaaj7IaSt6(WUr<8Xbeu)*T&1X=pH1syFohm_w6>!c#w7tJTg8Ui}1w%TlvU@{C| z?;HTSgBH6K?|G|tIK*WVh5serN_tMnGlyU}dV1^aJAowCMrhv`zo7aIaEFx8a@dZV=0mOKp=eBL4nIkQn1_+b?isu4_oXxib6pP_ioO}SQ<1MOBuF@X2ze`l zbjN>jNBXX>+kX$Kt>^kBM7pGiFVI~w90F!*{JXh8!%H@4%nY-=@$*#q948v?BZi-3 zDVGJMqJx~>B-QQ7xkm~HhfR?RoCrAHu9P_=DnAc6AAiq}0uBydiz#J{H>qx;)anYU znn81PzDcS?`OE@=GjgN8aIF)Rl5#ZWapU7R9kT9IyT3b6RU@Gj!N0D0;R|PPQo-b} z7NeTHE1G_fFDBKG5u!MTDF@T87~p97uE8mmC%j2`shXcY`wyfPsL~D8202;Mo~b&~ zth`OGN*i%w`D_~UXD3o;SW!TYaa;EuSt)-&k{cw_$}JtOXUI$edM`t2#QJXy9V;>T83OSCfXn!!ouhp*VZ0s!|iT=j5E`~n9YpAQ^&h@%%IWSS8FFzY8^AJLaO+=>@EP-|_{+MZ#~lEgCuM>T8*zO? z8;h>vE)=MI%K1M*2zT-eCVWr$6o8b#k#JZ0z67{5=;0d4b}O<3YWj+R!98q!J$Zr0Kl5-3xSvjbֽUZ#?zv6#s zxF9y&I$jL~Pv-fV0z)k=x7=i3_)-P>SeuM`fWN78;nv{DWsm*adxXLtLqOJ2X9gW_ zUB>iY`rTzJaYFwtw%NXlyZwJ0orPOdkK4wFq_hIk3<&`#0qGb9EueG=6Da}d8Zb%e z1_?F#E9ZlEw=+xI=c_b=E5*SXGlp69;r&n?;OGE8{Y?DBPG;79Bs3BzUK zvcDbyIl`THuhXOBk^k_2AR-(cTE>~g=##;64K$A_Wxn{KnYOQ~^jaI;YsQ|Rgdn`E z#?dc#%9C{gl!ucCw-m7**}xbOwLH%q#x*q5!w!0=#&s>1QM}zrbD%&1oE)|%XwAr2 zc{GB~oQ2fof_dLsXO-EHE3#zaLqlC9nfHvxptc&TgtI-EV+bE_tYq-hO@_;hHNIl>9VTk zz6%E%r;s}sEP4A~8xqn}Trk!4tY(B@IYv9WT z;aZE#r_BxBE9v{Uv)}nV!0^2I20}9;e0zKw{Qtrud+DuB_Lnky315Y@2RT=`ZGO{F z;l@w0`&@rq+k7CN&r2G<;Q!>z9jw>bZ1?DXhJ?|sh~wowX=bC}yKBK-XN&_%5{%Z3 zsnVxAp9&&V5J?hG502c*%AUQtCrZK|0)3}bC};8;=-oileqN~48ayRFX#^euTn)0h z26a71(!W>F66|0`Xlzx}$RO)KcY(WAbm`U$)sYrVBJRLVL8?`Honx;i$_^6xx8ffu zJyfC0vtwJaOBHTOlQ<5ws7s;zA%n{9)_2$rSPH?BQk28|k4CySScSM|{}x+&M)!#Z z4EBHX6OEP`_&ORCCTR&HmwxODt!r`oms4)(Tzq&DSzfWNww3atcaWXDOe`drL>#l! zw+PI~c~GI#PrZbRfK5s<&Z}&*2Kz1;nIus7!k0Xi6bQw-|RqwyG$FScovh;-{~EK zDXlbKyH3Ukrw>yJ*10g?H0|lHm&;EF3W`@XyuCd+HRLX(eYm`iMhCf)p=v$mO!|H5 zQdu)=zyIwXVf};!u~gs)^mZz%jI`fg?xhEG&)2ZOGd6(QUkXS<&;qji+q>7h@FqbE zB3X@3L0l@dK2o40wU49Px`(?_sKSRV2XNNS^M`1_a@!IR0%!iXoKsu;XUfz> zdAO|qU3R4H(;rXyMWi?Jp-lkz#P)=2izkz5oozazQKL{)%gh@`NRvVljb{ohunu8+Kg}u zdK3EaHO{1go`;<69fm{eyFnM010pE`OHyRrmCG9d46N6mcuEj1y``x@A*NLu`!S5n zgCy@QA>*Kc3z5-I&Rz?_%Qm3zH~f_GP7@8j@|_&bK!~rmd*fxknVx#joFy?f#;X$% z<$tB+hxbPu&1RRgm!%Y?x3;#?N?3E_?}$@5Ve~f)8bVfR4GHet{s%IP_d(G|K8M^t zxR$w&6#g+&Ciw1Gs$V3*r;;DZL-H1BPH&*cPc3I0S3FY_9a3uQr3N;sM$ddk znt8IwDfzqU?g1s>TU3;vKJ2m>SbaMZ%%2L##86GZ;$!zCwc-db25oLTFTky%jGP(& zAYY+`a$iu5eJHeLR|`Cis);OfKu1sj@rV3UEe0euX&u>hucH*39$*2XgIiTMq6XuH za(itk3(yr zCw`mIrQ2c6mp;T17!5D#iK4Y`1|xM_GW)M0kv=MgzxSvujdtc2X zpKIesjtzN5{o`B9~d0f%m~*fi^IlAm%lCa2tOc8sR&- zT<~X|Jtj16eOZDlVk;7)uRkv8b^=-*Rr2ZYt2J9y^K|DNdpwz~U0J;^vd zNlh+!XfJ_H1loZdB>GVEiLc*X|M*F#&>u{={5IUcT9Q%0J#CEvK|eM$%nC&Uq8Q>1>MTesU!*>;)B(WL25*F-!YTfU}&LKAEmcO=!ZyV$Hw z2OuC@N15X6>?miV5|xV2eHQw>wcurJea555@PiiWE+SCua3Onu_~u?+ohH-(xnAi| z`AIU_?l$(@PX~ugZ;=t2n3z&ZAA*p@;1`<<-6(DREiogRs`vGsnVpsY+>b>m`TDI- zb=wdBY5#>(7%Uoc=2cE?EZ1JKPx^~7!tFMR>j5rWG)J&5*GuZuma;5b8aj5RC*D-g z`JMhyb^SqU1GVR*kL_R?-a-CyVoE-t!Bh`a*mS(tLg`)EZ;V3XMTqX>^Czido%sQR z7mX=J269ha+MLEzA=bb2b`ooo0w?kg_2pA(1o-B!ZUj-@AsAiYC;R9X=up42B~nV3 zn$~>x@z0yKWoSYVC9!4MGXtKIl z8|q3D@Y;t0owMG0r* zBEZl~g>>w9z%6ES9ct5nv>jBln)9%+j@mHS2isRGy)8*2iIO`ABR+@BQE6h`pmNIGbt%!wP04me%IP(Loc5##FHuKS!KVl4H&>LGy4fXC%>bHiJGmF1H zJ<5M{%{uUpl^L5iQQ@$3Q1G(tU?d~6mRy-F`;&|(-HZZl3vP1Ssk63z#8{x7kYxbN z;3mmWNg{QsZyoq&CH>$2Wo22%vo157+8YkKLdsVi4mS{A&Qu|WN-AeMjmxwX?EOa8rT@iC_jCsG>DJm7r*RYQ7yvo<6ZlU8Alz198-_4ahwT&q{DNdE8e` zmH3S^sHsG!ZC4)=AgI>Oa2)0Ru_{TOqbW{;tFC0x+(or$yZO`j$gc~v@yYuZQFR|0 zlVE3DaG4lTK!J==j0RUlcaR*p2QAajf>{c=#2SUkuFEVdeZSz9~I-~OZ z+S)j%a$4 z@>CcN5Okwv2TE#w&UZWWajNxicZ*83LK*U7e|Zv>O6qEgsSai=W!LbGc`-8%Mkk+)ue(NP$yl90@}7B=TH4o{ege@JLnK1QVqO zG`s;NrwmL@H2b`7#B$zLzs9tfU8_X{f(fobe=#3I&_lIS7X}Lh&5@+Jn?$YX4n+~eUK@F{r0%P=amSN?a*Oq~nlc0VBltT6mdXE$5=tsqik#KchUp(V zZ!{k32)I5(eRv)v1%ayH8cRBWDWulEd#l@8>7T!Sv7gE=LRxq@T{kMySJIczb(y@L zYS!#N6QHY4L;o=BCA|%fL&N|iTEvOgvLh$8G5|2+p#2cEZnSEk5(%DB?AU6&`jV`D zOd+!N<>YEFm$$^UoY>24;B=AVVZB4$n+Vl_%c9p9L=OC=58JD6hz#U}G_tuzUG&z%0J3H19ISG;=#Cet^+R+(dqQ|e1^t0@$xhy9CJ!KoZz z=pSe^Bt4;w5iR>g-Sw9}CirPTFvSN%v;Zd3v;4Zqj-_8tvL~yUsp8`ukuBE+yPGc} zP)nHMfDC=~UsTbj#(PRE8Z;EOFaHCrdB_#tF6{bLC`ERu|D!_y@c&%PYc25h8zBd+ zziWka0cq}~oZ=VpfX1-IyyZO@?Xafuww?(R^(2f2oi6obrg$0H|EGsAyqwgIvG8V@w5DwzvcaV83q@O$V;Bvn7v);Z(i z-cpZzPO)Q)Df2HE)%$#U97^5fF(q9wo5cnSQu0*0(0Onl1b|7Rv$sq)h|mL*P&@A< zg2bItem-*Gn{KF#_bu+LA>cJV)x4vGm2S!RPsA!P_q$fY6qC88lJ_?3mI$i9f#m+E zykp<>eRiOI{+@4}o5d}Tnf1(I8Ms$LcQzb5IZ^9eo5PEIe()!C|AFZK1A#`cnGmO3 zRyb`O6$!fj3Cb|^fo0VCx^a)0-2tP@*%OzQe7!%^%_|A5CL*ogXSsp?Phd3L{s|f9 zf&W_N?WYiN$_~c*Il|AK$OL>gXyRU|=mT|-cmz~CCqX=83v`E_q#eSr{msV6@Mt-F zSVP(vndxW^&v$ZU$Njk1Ttv))7OT`0cKvK@7VZpf%I4qrJ#ogqKqOVPhE23GNIi~! zqj0U=urLv;466n9L)3Vr>sBSbUBhz2caK(^y4DyCnyVq*RWJD$JBEmfQ+ePvfNnvi zi)}#4;Zb8WU<@^5f~fWH zqRIEkOVp=3#+*7U1~2G63H!5W8@v5gN<31CH@LrtQMSSv?hlCBS2@+dZvr62NBf=S-ZHzaKq59wuU_AGyLnF4RqHK}z zaes#dgg@ToJ9iSGz^z<5@hOvcY?QzseZDF@U?i~4QU$MnI4>NK=83*m#RMp9l^GZd z*&S=1{0Ew--hZx({;^4(aYxK=58v#aZa3VL?5+f3KB43b!{9W(#~|6FgRbx?>6+U2 zkAz*HmI^q8N=*o;zDrW9EDo^VSak2tW&Ev83xA=}Nr9FF>GJJv*sCLOX4k^Q1BmZqnUUA{pJmmw#e7jbx}dn z>mrHd9{`e(6nR(k*cTTVU2G*f{57&7)_a@kfq8eKT?2L7wWOgCXB01j@cZ|8MZl^$ z;Jlm+REAVIq#2qMqp3WvSXsCH{Kot&Oq;Xg#H?1rQ#0M9_y7)-9+n z(|%slFdHW>bfT?D_ks&Ycl+f|1TbJI)H~A(r2)wibFY+wRv#4Fa~oDEWQoIfyGtpb z&3WVZCRB^rmK_UZq5TK^AD|yN6GzrhW=zDI)3EmMA-J=v1hfPnOp&IVwSEu!9CMEt z+K~B)QNHwYT?qB$HwXH2@x`F6Q00_|x!*0=p%-&sD?|U1&c`F(SvM0|`4uC!`Q_nC zuL9-tsfXi}QOQJGYtmj)=Z_~ZdGQ5CjR9}RlOL$_SPTTY(S2JPC#Ng+qTt}_)ZOJk zQktwwj}cT`P5L82@<9@@`1GK=qnOOAr>axgWwc>WzaU{4dkWn*Gyh8O|7|yzXI_#0 zCNkmd$}~n^WRO}w*>B+6>NMqh{E>Qb`a8p?>PTAj{ zy1}lY?=D3SM3NxzPFu*5?AHFt<%b2C_W{c|j^>w*GIPr+SE0uDd=get-hK?M!LQ}Y zDI0aTxdK`eoq;bNQJWfllt1{$&1f~a15QwDvNrHFk+n~;C{bE!h`<)Oz_vnb!T)>W zn<~{F8Dhn<$)U0YAr`Oq44;l291Zuy7(rkYgOQZC4#%7k7ZrrV# zwZJFJH;s1<&8PhD0EhC`#?k(=)iW?RT=>oiw`?NW#RVlrn=G?B4@ll`>*j-4RmaDu zxz>f*IB&%ovF!u}`k!?HP1~qDNsJ^m&!f~7%(60?<_k+ckqOO=ICC6rTE%-DlJ``! zjO6T@PvxHrbu=k#QCl=5MX1=ned_j!qXxa)^LbK(+}y*Z-0`#0J4Lv~9F7C9GcD)# zU87*%{qpvDyz0kW6IR9vQY_82@B8DqU%)MCar_t%&vSc*-Fu6u6y8^*i zPj(i=qb%pxtrj%H7xVFg_H_;ZfaoDF-&=4FvVuk~KRUVClk9GFVPR%s^p2Nh+C__( zn8k1)w=V586Ud7_*FAXjS^S*7MLqE_wX95FSHvkyW7^2awU%zvdda=c6`HUqicyxR z>D~IUD>)Tl0xm#^)Va)N+h4qkI{QM0;@l1h27rNX$NdU2bL=w$f**bG62rgqL2qoy zErkVQR7oR#B`Nm%Q{twf4HI$xa)Xj#b+6NU*{D8D4Hfn)+?;n=hP5W%3;qRyrwsFr z8>7W9qNAi zP?PG?o?y`u&mU0ej;uM?+r>!KxI_!+1y%z{KiFX?kUW8oa_$Zlrs%+nDXJRN-GN#}WE2u9J

;)nHSEMCOR2qY<5vCck_q;_FvVeJ^-1MZ4|kL)BtQ z5B!h#% z$ohR+xBKl8C56x#Kihd)QnZt#3Nk#Q|Nok;)5fZ#!GZrkZ1Cu-BpNP%MzZwn#Omsb z--i58=Egtjue1`hQl)mX{aS0}?2S;m|?vvBpavj9~{VjHO zc8cYy8SZ}fKv_csz?VRip>+!wT7}`E&Mg`=z8w8b(y?cy8bj)$vfk%AhE>T9H}9Gh z1_p{8`S)oY{d+0cFiQF_P^G=4?hKp7&mHmged=pzh6nNUpDG-G`*21nd4)Ep((YS+ zU}5o)V&5)tkWon97JPdCYC-UE=gfJYpI@oz*N3m3ivnc~60d-35+@y|7sH4B<^R7* zfs}=4Imz-d&nJb?XO0Y7vXa(}H#A-2MSZp}={f!9^tEb-eQl=(R|Qp@5S?q9_x;kp*ztABDaX;Da@ z__K{x$tfU72YCf3D9e>s_%mBoZRj8*Q&<0U{yj6a6KK39=3<_tL^WZ5FCzOZRkGb5 zXI@|n+04apqDBg%{Kb|0w#Mj~ubEVJor_mno(+9d4gd2sPz8U~=|>SLcQvdxD%x6~ zQhvi-OtwwAjcym@LA=I8ct1ESMaQK{K-9OiR!KtYFF7Uq1#}d$S5*~)?f36FF4j%<(b9Sn`5!#(> z<9nvM@UlK7Mn}N8-DIl^DjoeHEAU59P=A1u`7@--SIn2ZiLFk?LxYy_OcxSAho)3L z@)vIp7+8~dE$bJ^f9*VgMuO{k0JUE`Y-$a*vc>e?A%`(YZX}xi$rtshcaj>9?=uM` z#QqA-cZ(xI@SMSA`mH=lFA({6d)OtlyAA_0aHaw@_SsUlx?xY}+CPneTn~TO$J-g7 z@MgEd{>Rtn_*HrEtrZ+C&A&wdJX`zSl(?*!tx8MA!0oVJ&N9%2qWem~N>oNBLH}D! zFr|`zhyN&0kT>7iG6Tm$ZEXhS!bM=e)eIOSZ{XkB;y>y0q)6fgW7js})8N4ro$joA z`u>-eK{Le%GEt;q$A2rcCSZPeS-Ku5kycYRXYg_DR9sj(j!bUYmR`UhObcN$c z`GC87S+C)=jPX9cTh(?a`TrL974SeO$Z2b`-dPrkbdWrMe*I~KQ?q^%Rqv>!yH zH>-cSHtVtrq2XMnXS=<7U5)H6Dw4C0wK@o>SV9?hsxg$nv zHQnXEtrJ#~_60z^T@8j_i~%qlHQXKTjw&2>t^J(Nk+W@Gq|`PmVDzqf`sbn5Vv7P|dec;luX(A>ip6{jjX&63@&Og$d6? zoXWrOqQxb9FTmlOPhe=!=qe}HD;gfBy2EVDs!a~?H`#u6J_VdnXdd(E(!o-o7L!Z& z&8(MvUk(dNv9)CCkCU3k)}M)0i?Pw1VxSI_xG&5=QADep7|Kjgjt=$W^e+`Xc&+yt zb}j6ed+38lJR|9G)z!UjZxa(B%3k-O-@U{QgDC<&35xul z+r$l{SVUACGUhTkCYp*!8}Y%N5p=qB3B!0K!(_Hd9t5-t648k#r`*AY>W{eCC!aS7 ztrlHc`-wD>WT#eE)(^D!Uz^Q&Pn&1R*pH&wYksdUU%*h!2_SSE>dNTgY1ppa>8v}z zo1c^ZW5;mq{f$PH%mN@F6TxZ+mrpyxF!aCH9Ar{P2;_zT|TsO`tUWniIB271;y`4b{tj<(0 z(*FF*MKy0fw;t1xVkzc=WU?-NJUv3z@b@^seslH@7Y)e2f->!uKY?!?U{9#tI-~B} zlPG`ZHgmo)c>}rf0uGe^Fsd#IBszS%PhwqgVU<5xEuZSQ@q^%d%1f68#AWe=nnk}J zZsgxE?EUb0$R!FH&gV|g0uj6U8SZ+Y0!&Lnyzm~w1qD1bt2^OdrGr4~a!&bkHplvE zve<^c?3&>+pO1{RF&1%(7=;I8uy|IR)qOPgE$2TKY<9vHBoz{Ey9lWRpK2h_uf9y_ z?3;Mad1iiRVs_*hP#qiir+K%(+SN-k9K2{VQbBte-#rl*P?y`c5ij%WnvJhBX+zZ~ z36NKol}*W<1z%QBC%0dw3hgu)A@E7%=ym7n&HVJ5=EX-=VwG{HqqS>h!!z3ajRrmd z_4muzr%y~o_1m!@Cwg%#)ww#emAgic<$?)A0yUDLr}6vKKygD3X}8s1T(xnr-Z4+j z-_zsf8wjkjK^oK?7pC}_HkBPDs&*Iz_3|Its8)$mkc3pQ zOh?01&xI#>Ew4Tt(2q#tj}I{t5K2Ai=*Ff6e4ya((WVCqTue>vZ>y}6MwF#8hSyk0 zEWDYEjdyOEbT9Z=?{|ZXsb41aB=p_;3Ho0@E(-|M1)>Zz9CLM!v?n!sQ}`w8KhO=` zEp}}pZ|jWk+jO6eX@Ky}l@wLj2ALcgGPBnOZP;a^)RAKOfO4CzcY^orZ@b*Gq?#ha z^dwjFC?Y<)ibEzwt`8;o+>u=%?o?~AQ%ZE2Qk?V4Y_UiC=D0MR-jBYy#O2M0n%3cR|t z0D#&Eakh?#$_N|}3cJ~samqv(u%<{`_TaVeSNu|sn|Np;{tSlGmZgKKn$%^N(!W{! zGNrWjC1bJ!B7uJc!|5p?BcA|D;Uu7JclaxVCYAauQ}Du=^J2(GA0-uTURU|VQ_OS( zv}#FGq=1swM!fCiSS@7<{1LmT!+_W*c;9a6^@jZIlNVZU+Vj|nH{sQ_$s`yO6i}HO`Aqz_3H7&M?Zu5+Gi1wD*`eZ+|`bUZ?qm4mygp>N?03AgqGtbJ}QJb)-+%pyNoaHN=WXIHlof4;-7Imw~Xr-Gp^ zNyLU{cA-4g+RaV>2$Q&PDKe$jO_j6fx1`zMd+v2)M;*hI&!i3}?9kRqd3K}5wJ^uK zWXwh^K0+xa{#HQn@@dTN_ODB|aT$Hjmmx>)hLgcw9$+5d(PsUW`TN<1aus-EMzUX< zk9gC&T{=vu8NRF!J`em{@|ZO78T3u!#J>KTt(3s3_upc8(~{w-M%tU899JD`QiR-F z`-tHmM5?F+QLj0EzXs9ftat(J08gUfJPVQDzDvJBlU~Eyh$wv532BHr!QAlWGr@K$ zw)*cb+>WCNX^_y7&j)du zsSJro;8)Jv%^?g;Jjb$PDOMe6qg>vhKlq1dU)mA*zW#+$zn(gJG*w%*{kF+d zp_xHEOmkb`$Zcj6G1~dv`Did3=B{4Wd@x@GlzgOD(?rPsx zHM}3pe+;Ivt@MUbDAio1Ud(`9~|V?-jMBPUg-Qs<}J3A?|tjHoRnX}vxriz?3lY|M{bvUO&hra_g*|He?^{pg&KRB!o#_D#Q&phX8V&e z=jf&d$%zB)p?4YW>Km=mq^9uzrga{f0!O38t?w`Un9LvB+uLg5;4jImm|7AJN zr964{ywjFjNQ^xpxczGTyXZ&JO}4yf+rQ==!?k|7+Y@KYg zz*#~_heB-Jc;T+KJPAkf2|oktp?=jfO`y;EX?M@h7Mmuxl06-ry{NrLi9!|H8b?tj{ScaK3+L1kXMIE3=IGT5u0l>kP?x1LivmNo z_dkWco!N&gFI>VH5yqUZVJdhj;d#U%dVgarH5UXv&U$2~pPd#k8gqOY{?c@QYVrkL z8T#$LS$!~vW+)O|fRj2|twib{pNJkY|Q2aJ_0zH;mZG6n|pW;v1lav=F zD~F}I)x?4NNhkrjbZaoeNsb$lE#b&%SWhy&yy__MNRiQq)&|HBxs@ryj!3}s{jl7D zi9jf&f*jhe4T6ZKxKL&-t0!~cT{j*bsZ*m3sgU{h;m_jDw2nBh5XZr3Pg@?Lo*KjS_V--Ga%fV}*ryK+8v z39zrS`Z@UK&|-fGrE1W}a@H+agiw(cGPJ1Pcv^Ele0oNxwdrjIx42WmjVQ=sbHsZZ zr$$QAHk*4R@53He0TaWBY@le>8%T>OCDHB~{hqiB44m!X8#m&X!_hTR@GTi_)d*eC zn;1jFPI|ffjn@>a{UFE9SXWp;pNw#{hpg}ewA+O&R_tAnkfW0YaGXf6HuB))EaGHG zD#{tsptLvld(UHV%+6%+7t=Fu>(;p<=VElbIsSescq0RTOG$D>5ugxIdwpsP1jruc zlQVr8Z5a#;N|?xdKdRJ9PrMfy)f^N8Us9Cx_|O}L-9gjcy}~GA<1381D2PPkmU!W=>MPw)q4SVl)JcCeGBo(ibS4 z7p^P}7{%D5XYTMY=1BS|L=D>05)%DaQ{DTK_X8h0!7A>kehNyNR9 zphdhuLI8ZLz09*U*yTv6@gIoZ9?-q)hMc+PW%L5%=a#fU`e0WLYRLxr3egw%I|0k? zI8wZ=4UqUoFpf&?QR6nPkK?&GySMS-_>T{LCX!LR7pm+JBlNqpXIs~5_%$S!}$aMXAuh_1IUGle|{@~62%x2$IlAWT?+tmd)#{yk`Ybw zaddJD_b>I%r+{FpH^Tc=4dskjn}j~HhgeP_5`Q&L3#gjKGp=(0v#Fqs*@HCsDQy} z4abkSO2Sm(JMRR`t6T$}RAp_F3HC&p#5Z>uwHGem9P@3Ba((*s^YpBrEAtZAVJ60r z(oPoHAEke;whsMOkimWjayYqc7>Bl*HtlUGtI0MhQNivGx1ZZMv1@}l>Bpw6RrsuU z<0@y`J@Fx(R5)_97ZnAJUL$UpVfLnxz`41}J$OQ(P@pd{%l6X=q3@U2^752Ag9evh z)ICv&gG`~XUo(&b$Uh=br4YIoRoVr>=Nc)zl{wu5K1cKIl|lYg1a87(}Db$|NjnY04y&`M_6_rvSk*sv5!g@R&j*7EW? z?dLP<-8xc$E}WhN0$DBWK>nAYB&5hLY(jbiv{Xzob3Y`K^z0)P|54(fbe%RFDrktk zh+zmwizHwx{2${cf#u91*DHJr2L93!P0;btC-A|ZCa<1dE&+nmKE8!W4f6CgXw!w* z9!vAJO(K;4&kNl{5E90QF{tU1RFaylH zkG|4}rJQTZY=Z0R`bCg|F~aNd1;d5EMOHAC_?W?Yiq9sUp~^+7Wn0|+ z&7_KAL}w5mt((~W++Nb0_ZBX41GR38ZJgU{A2@U{L(kf zaQNk!&B#Q|mF`*D~e_{Wr&U(W>@G^}N#3{p!xi|4?853)LU#w%w(B%_?(A3XKlBaL! zLA?74{x7)5*`wW$3)!^D>d*ma_Ll`AXP z-os=!#Yc=kozfXFUN*e8}edw30pPVNj$dzjWwZZ|q?qIxv!uwducb6E+EjqK2(h=vjb(tC)e%Km< zi%$6NM3i6X*%IPfPJxPds|{S|6>K_$(Fw@(58s)qHqP;j*lkTdX@KUTPFyDBXx3hl z$5WJq=e$<7iZt%C_{B~gWfmN~NEpF5bT5A;=FadRCvxS=AFYdQ?OsqIMve!4YH_@E z>@=QSZ9lZQWZX$_{d1p(bo6B$2xPbmILF-Yh#ebIyMEw@eN*`#cYo7|OXHN_d zzp)8)!=tUg^{ksj|8DlMP^ZiMA+Ly0>jKBw@1^~QI+$eV#QuJ5+t%3>HNVcTC@e7g zfcBvx<40mHQtTQw1M*>l7$aaROpj5y177nwQOW(x${QbNcCBSo#VsSh!|tglz91;6 zaM*3ikE&`Gr#(KFhVua)oFTS(&4g`tnYM2pQEOKaBAzfm(@005Z5U6Oa+FblPLc`- zi)c+1p%L4Um>I+J099}uDGxQHh_3^sE=Yx&!k zxFJiMx=isw_TAIpk z9f9B6?>~bZ-lzhHV;19}5Y>%13j1z(RhaGKpPM?WCqV&X0+S?lB zh*_5SkNoG5rOc~+R9bhC_2xNCSq>-Cx6YM)6KYrOB%49pb@`2GBIPlT`ayEm{vZPj zf+omyxB$mhmuwEamVHfeM3~3XT~qyqvF5QS@f*jcrVjj{PCsv{K8|oyMZAs3pXBYr zKQy;P86qTmCqkGW0{Rbi{inZ+JhpMm0*pdJzmx=Yco+6Q5{a%pO_F?&n48cc70~?x z#{dKOOll%~&Q|(c&hkO2BnxAvrg{mil`dYT&IImMBkVZAAHe~%xW{lk)CR!mRojjS zAJy8YjY|}}H}EFkB`PZzP5Sa;#{4yU+1L>7_x;}6dmP@JVP$=5{|a3G4`b+NAt>$h zxpVXy&=j?=hJbl%(C;|*UVp9<5>D^TUGPmn$1mcFVRlU>HSJ?Kta-7ks z$J5~#M4iQuh+F~%28D^AN&1r4@-R^7Gj#DYkLi$VM)0rgi$7K|h1nl}3&i)6gX~fV{{QIH^6T1qUbu70^ z{1C1+kIAETQ&%%Et!dH|Oz91zmlg~f``@~Ue8&pzy(df#C(PL}n*F{*zg&hAsSAEq zpzqjgIhN2ozAXW2a~fAE_8);%gF^w3n7I&Ycm)>HultIIHx-1mOWI5`l6*wI8fevZ zI7pmRcomP0QecC-Bx%ze+UKyMi~{?dPq!&3CSO}Ht-iWeBiHjOk?RU^QUy_smu|zL zcdnW*TKIvGB@oA=sC3R99$cF2MG;@UicY%fyb*7AcE}AefG~9M8KFl3V@^L)E{2Q3 zO=zc0Qn*FU+qt*Gi*^F(xDc8t_6!EeGRMb;0Q+eEHYb|1&*XInb9K#wp&|!KUhW#zEL$w&)YY1*FM$fP z_69HiRqrO`1cBVR@}CS#+dSqJ#2ozm4`k5V3;3dvl=r$w0+Z{h3Grula^2%a?0~x+Ra+muyS20kDAXlA?kkK*8wqL z5lTJo-XO<~Z3{mfm|j_36^OlZl+e3TlZwsHQTkl0h_Ji|{ylSJVEApjmg$({Yxwir zt)pA;Pee5u&dOu>KtUh-*&pzleljlDwwqpk*{GAR!9qBzK9z^}T~{_LSdojIe6B$$ zpnWXhsbMZthu|uD1NZ3lI{%z$U&*-c+dGZ2q6#&U-_!T{$K|T-;a!{#(%t^Md*Z+D z3fGyd+Xw}0tO8?&5xe#G9ZHohUlhN8!7SJDSv5*<$NJ@QOg>BHuvgd}`E_s1pkS|n z9=I;bFr{PSrV`S1lT{k~%xHOnN_=FgJ@-N$%NQ zUnq;*&4O-pWVYaatcn6u(r9AfKhKR~-iE|Dm18{UdvR=2Lgp*)G8lifG-YfHJZku= z#91Om@W}3&9<@Xg>|elg?$v*wg?jAE<1QewF3i;XN?*k6II&d4+r!wa&Ba;Y&r?Tr z9h8um!tROo+dLnfhy?bNx9i+Urvi5R&yc#$`_7_dJ8R8fOO|dXq!KI^02FC%%T`%u zWbw`%UrCgxos_!+SG)7?%qQ7x@!!U)eeg%~b(4;S*Ikz|ElXF8qqyicK_uB0_$&NT z4&Dr0%t9#X@w4H^w_KN6RNMtAv#kxj58b2&p7CD{Uc~?#8q6VwDb<-4Lt$M1<$2Bd zq?OA;!ab-*q9$j_5L0Z3B^;}@Cuay)*~lTzxxpR6NHTQzKuZ}L!@~1pXW4ht1w!M1 zWcMiC`Bn>akB$sz>_#L*(*pqBcrl6$Q9d>v`cnfE*2c(j6R_k`vx=`(UVhz_&))<-@w3y$ z8O>96i>*PoB6jQhL)*e6YbFtCBN;{$qb7&>(lKn$qPnXS77wA-GF0Jp5VFIB0~}jb z<4oD6lImtdtfe9?Aqy{~6v*llI`JRqWb4iieYaVci;PFtnZas%1$X)>+7_gms+2y< znzjX9NaS#QEKYc4$hB|??T=qM-<$}B-?!a{5UmexuR#sMqy_%A8ozdXSwmiH&StVP zf=?tWH@ySOb32|GkcLoiyA!InA3edYn_ghomn;QYj`-Q?V ziYFQ)wngIT1lKNJIjn}7bz3?uP|}YzH@mXDt8_C?r&5%b0%aV)ToYFfFS=3g2!Ki> z0jojuvx&~yCTjL-T7KlVbo`J-ph^>}Swdt4GOpR*`>4GHsa3(y68D?qWl)h|%>61K ze$t(weP^$u*>bKKPN=~;Y_k2#Mb&n-gmQJ0FQ&H#GZ04jk#9)dt2JK#;C5wqM~~PQZN>kh*LSOTof7M2A{C^==P;3X>3^{euMuXBahQHYQvCY+y5)CyLND?yJ zk2RP?vRB{Qr@37H&vuJi1t`;>nU0F8GF z=w$+uCRyRnkGDL(c-~JrT{VWUO`ANmhS|bJ@y<9QZQ5ZuVGAXDm-%+7+q{*Qo9K4t z!#s>V1;hsD?)P2ig(;R0R{dGaA~EZ2clf+m@vNHSdo%+z=0*YeGy_rw7Bc`LG)L$T zwb@D*{|t;`jn|Io><1}**wXtKV%N;+A5K*2&MHyXpj(W|u!eS#2U!nT@o!X)XokHgzDR)TjlzkxJ%T6(!} z=*bXZa(4q+_MZbY9i=6IGEW-M>aP}`&)zP7_HuD0(ybVHrddZZuuB zf?7N5t{(EuD{^R4|Gju*@XdH-_>QGH$vVm7upGdY;emGfvLooWm|~a)^JV9EW*hcA z@T9n<)Py)~<3+U8q+G=52_=}l?-OppPmRG$Fh*RE)K;(CHa~T%mNTjD?h?=^ZSS8E z_K@h{w_yo!hA{k4HlzXiyEr6Erm}qzYf_n$arOIs!Wa5CgN(kc9?V8F1o>!2ewqsF z0+PR{rE*z~B=(NeFO2M3^XkpCe?3vw;NSjbeJYIvC<@~`3*EwZLw-Q~y9E;-5~8SK z**`KG8at(8aUh`QwN~Vn+Qu&sBK-=Q0MRX?kj!%#f%Ez4)zjK7gf# z%bE}9LCfImAGuLIv>WlRRiGev+pM{eO0u%6i?E6yTbIIW*sLzgc4A(M6`wLJcWaKi21foWsUn+z?B8i`Vc2Og-XD1}e%i)+C zA}9HbFtnM#{57;6MK^cW&?z`3K+#$BM~y(>)iqu`;8`1h8)m_}s%7tZ z>R>ayBG@T0^UBGe;vS5%5KQAH^vajXHE@Lpo+ncqy|;B|<#1O%M>7l$8)l0uQzlt= z*LKI<0)}9iv>XQ|n0gChoC8dx*NUT7m zua$X!wIedLLT~~vuM`|z?Oh3{9E9pW6BE2I(5%w^{Oa?iWa55GgQ}J|!6JQKvXG9c znaK!*@>H2fqqxhJw|;*1L-f;MgN-rj9UP`A#cwMkRje(&(!C7zF3Y2yvm zoP5$gy@(_jOa@?9eU1%uRb~&;fAy?@NH-ar7r>Ml$}&mgBBuC_Y$VO<OoU)XQl%j0Gk|FF_hYUVu3e7`(v_^wICx%p{m|un#kV+(ic>=hay7I-u zZ^odYpmJImt{r_46#b#e^mQzQ0@ON4uJ|p@phZe|I4zKVkotdsfQ0A;=)%Ei7nABW zLvP!Rshs#bgf znHg`7b$KQ9!8ZztLd(GabtP4DjR#qgwKeTpV|!hkAfU%I=y-g*)@{I7iL-ix4+!Vs zz5r{chH<+Quu>2s11wz~tTgAtc>&DO*(D~`seDXSgiWh{Kh0NDdWCr z%({;^n<@Pw|ANLwcGK%xZ56zM0;;tmfx(>$fhBz>;=o?})VQ}zyhwN@W<5_{8dO&O#yg=}9*+U=p}3BJ()?PR;^G1{jmy!3q&Md~fglfxY~uvGuz;$7 zZDo6GiXNbN8N2uA+ZwF_oA`e!#M?S5792Ec_Uuw#L(=8tBX_(tq|bsE?<#aYD&aj- z#%TKjo-O+mUl&gP<(p8V0o-1O#pHE3VSr;Z3EnTXEhpLG4;D>T=5P{J+t#Lmq7-BO z584r~V+v7CeFjmrlc-8nu~n2t+zu*NKp74{(zF!d>dCYU)39Bb=ix+k7;ZB?w@jEO z(tONH4;lO(b4Lb`%s(EmA?Sb!r4EKfZ8F8>u&3}Yxzsz9z60cs>3{}UeWe*fUW`Cu z#E3Bp?DLn8dEbp0Hn)ZEqnoU@@~8dzLC{HidVYEOJfp`(skgQ4W>cKeqo zZ~&dWhl_iJa#2!S1-C89Q^1L361WiuNw$C0pA>|yosy^o71Pj~f{6|Um)?`tOSjik zW@F&o+24heBrURr{!1iNYijq@U>UOW$uJXcKyxbo(R{jpO(jrxdR(FQC-V^X-tCm{ zy8R?cOe^Z_yvp`-akK&_t0zmZ0ou9AIMf)-y~9L-z_e_FYeqDxB{3N^OsD%eub5#U zR}$*a+$mydx;s6PJQa4^$VP+dt|yg@=1#Z%g6stwzBy5z)Tnzz;omlx;eP*$r*a?8 z8kvJ+Mzq5$w{d!jNV}tuAtfU1xj#)8d!3{fhvB_^0Ov5$T){hPX7C{>CgJk*7F?6- z!dDX#w9v$TEXmQN}#0QVmxc)#*Srl7&d4v>|?p$#h6>JG6@{EBWue}!4K02ZADuTmh% z)rT#=fj2ELk_D+Rrrc$S-wLVz6;9()OahMd>T9dj3!n#vDy-L-IsURlb-Azkj7AiX zj?BVHSkzEy8)NBgkrr^3_ZSR zBED=>C=rbPs|?fO(Jy~)rMkb~*DM_g96A(!cvL}Wfc6Y6d{GU+epgU@$NR!a&%V7$ z)!%3hd zO8zoG2fo*-Xw)E_F=fwr&>RQkj6+49i26lkmo1p*dA-Nqp3vl9+D7o6yH|9*tH_kb z_H@!`u+=*3%?M}tGQxZfT!e1hnujyV)Y z22x^=NBUG~Rvy>AtD(!jjbKsyWb{vqtDIYkxE%02eZ9LxGrs$IC~ffPS?p9ekz$Mv zNg-S3^h0}v`<`(}b{65!ro_!lnc{AMtV7C@(*T;nRov_fHEepwin%d9P!~K@P4Hb{*Y5FBggq~gJla!cg#{&&sJ*YJBp+(WqY-s0koXf zKK{FjXWr;XGN7-(96>k46@tS^VAVL%zmHXxLs;9ZQ`#;MLnu|i)|ai-hWqt4di?G=jkQj*7O;s?cLp>w;S0|Z05sSIn{T?M4~Jn(+oBp!C$GZUYGg9SLwGF zPhA~0Tk1MQSV*guPPk%r;O%XbSR7Mc>B=N)fs7~iV-y#XpZ_wv^>w%VSxNHcGIwdS zb2g>IY7w0i03Fb_69+|LFr3^!3XNUyF%EHe8N1Q&qA$XQZh5d}G4nsJ>~;7$oOf3z zF-;XmJ>nn(=DRqX`}0hOq$L+boZ>|09?xV?+n2QaS@xBBUIAlh?!))HokB5DQ=xz- zFj9;$Yb!|qr+)s6!ucCNg8B^=>RxO7mN=DzaruUj07!vo@~WZ4Ha~^GQ{OjvuWc8T zah2$OD>w3HD9K9Uhn{cYBV+i)xBxG68Ys=DfDT|x>9aW-eDc%b6Sj-MUMkQC8n6LU$jxxh zRCq4q&d_bwB@k=|>+aRWGDBPpZa6s6`6|Ent!bDA)#a6T1efCOcztVSwr9me-i6M9 z8<_^eQu0|kB=b(&{GtgGZh7(66-{lEc}@e~%69%Z;F%oq*8)&K@Dm=|`*@^~#gsW3 z-sM)$h3L19PrzChs*TkMN~gG&dDH3~!cy-pPOJWhTd18QWuiw0G(~FX_}gUm%aKw= zyhMcDDY&z`1MsP>BiY?u0Z-=jHFa|?EH!gg+vu>0LOivV9KFYH0(4st<1IZv9MUtr z1in3j$MoJ8pxtT`ZXlH{654)JS5k|kEdc&56|=CFBIx(qqX-E|n+#gc+AA+Z?XTOP zTGE#=05@nYT^X_Bq)+X&-qRaq>&X=1%^QBvx~J^ycd(rp6ANF;DTz%0i}gW!vi6vy zPL92ICA!q3#yhK+Jb332KcB*?QffkZfLP05kP zHw?uDczRfV*=TS(3)q=X2^?JeN;>`!{6Xm&qtKH8yedqnuN+3)@dni+=Yiw{OMv z#bQ5&DKP*{5){DeWPr$!ZyH4Cx!?lNRW@!^_nQhXcn*w~=4@}b;wV!1DG*F2Sn-EE#_G{=HW z(QJZX%ZN!o&o5_HDiY7usm3Zeb|!!xr%Gu2c86~NBC<-d3kmMaOw;Vn%DX$Anqn9g zN)(kC0=E=)qNq^z-$NI=n>~@zEgc9qSU#ueKxqMAqiJlD>0%sN;17fMuL2|(I%=;7 zGN~bh)5K0Wka&^t3o=Cfxw*XlKX#{|m^Ms)=m&}L#fAT>!HHt+xh#fiCJmyhaKJz4svX2@%LVZSndCAxiDAy#GVHJ(po z+W?&mEgAA{x)YoI!LyYX@C+0Me0NIv#`q^DJ07;mAy+{LeYlk2^)yWW)&gM+iVL_6x|&I(Ze8G8@3`47MY#h(_rVm}jl1R!*W-OUl>TUec4U$zyD zS<}^ZnCIM#@&?W@iB;}mxt`U!{rE@_h|nazeE6|ZF-0i6GOKxGPKNfwf*z1q$t5wj zGmbi@gJED@*kX8p{A|OFNX6vqJM;S5mY*7P@1ClUfS;mY?qXfhzbwmzr*WV2FKOO# zFLVhW*CA4db(BgFY9CrNXZP`gN_RWjTfa8Akv0&bR0|;!6Y^qMGP>!Hky}hrOoSa2 zTk@L_LjNGmhQH<>(!ZJ&lQ(niiV28vhB4VLom|=AOgNbaM$W<^f7+G~+biyxSMAf> zU4aZlPWqQhCE3VAsSC@j(pO#cPd~a{H0h8yPL_C1eVWAnJmN{t=6&NOz*c^?dS{;1 z+#lI!*{|R8+$Z%%i_pG&`o9f%n%m$$rKZ1I4T-CkhiLdlXXd|A`=+#alwz*r?yB+%ge_|J#B^lzx5UiEI zv`b0SHK@k%CuCbZQ$&Hzz0g-objB!+d%}j!EBVQ3PKFBI4Za&bJyy0Ao-K3pqjl>Dx^*qdj`=@3z8wv4Nd#oTds(zEA2xj4TuNfvJBQOiZ{d z&ZqwpEz#XxFP!?&@kfJcKeaCm75cc@7dgTQ>dfBFqE3}Osff7FSK^-4N2quz0dK#3 z3|Er3B>o=_!4l29e3yf2rI%rWJC|ZwnkhnF=HrUj8_=YH?53wY<_WpaT83sQyK%OW z7KU-dW7UR)MxBphYkaX!K{0O@AE*@wf+75a!J}`mtPm*!v`SIJXhJfn(_B?`Uh&GP zkB`u^_a5)Xgg)BlOX01R--+Jd8C!u4`M((K>zC3W5YqEl)$v{)`CQg%P2cf+_ghl=@{l9 zf>8ZOwMWCyo(Ii%mR3Y0Oaq^nOfOlYWL~&1mgoGdM+7aQfjBFh#_1MylZqK>7!}_c zzwr1;p6I6Pv=VEwj}l4zwhcQ z*`ha&nsC@Y`=ORCt!oc*9fiOUNKD_T+=a_Kj@82|y-M&zo4Ya{`FL5Ld}c&aP;xW!N&LvN>VNrCa%o^QBgInzU#t)73L*;lwA`1P zMtUqh-|A`jOu=B)$QAQBG+|rDICqC7m~Oww#ft7qR?G#OwHXMaNE$!XPa8lVc=IZ| zs}DL+bN>NqHokTV#B~^Na-0QA@6fOXRI{<9&1pNx+~t?j*|vAN2^jLIY^3?|Z&Af{ zNR=a`1|(y?-XLZf)Kv4%Tz8TcGHh~WXygmH7NvV%bu0Y`aI{=G7zpK=uMTE_y+efn z`{6v_%#PuZjDR5Kk77L=q15gr4qLgR$_zOBJSnVo9dlCugPZPp8m}$xW&hZ%CmUtm z%~uZ$W!m%#eocZttE#Pq5m*j%S}M3??Rt8+0(l`O2#Sl4;-)a>Wv(DKhYP-fy85{N zmKL9Q>O!4ar|xgn^m{*Ov5MeDS5siCiv;1?k0+O;>IV)Cqe*Ce(iuzsI9HPem)CYk?w1 z9){Fgf}bzdz4X%B)UMoqi`Nm-5A5p%x9OU8U&`kGY1E zU+(_3()4icIeHL}e#4pKWif6NoKo-{`DNnyzTV}qjjq}IR)z08CnNiYVfcaKBQAkp z)e{y9Js4&JaX-rW^B|i0DWOeZ`yVUjash<>_FSK6Wl3 z_u6pJPG869;)W^AKXPE50al#v<3{F~8K{s}BkvI+X*$?)O}EP>7$RhnDV7#5!y=B&i+wG>Q+2hBacA8MbEh{}^427oRzeY`>vCN|J;bdhGC=T8s`+H#4xfxomxWNU=&Y< z8gj@-z0*PIMpUsq8>KPr;MaUFJAm|(+5#Dvi|M3vzPwn0BB#;7hfhoh4)yAC=MH@M zx?W@V;x`jSIDvv6=g0D9<#I|YE~N{c^Th~_i7yr&k2>h%!@FvvA|D6`_R9EF`10K< zB9VzN=qqqvP0x;(megZE4>CLgV5zvQ8p-hZ!1*P=uQ?@VNpso5%6$NvV#ydQhE|3J_iPIad=BV*DQ(nKeyArFvxx+HXl4tPdBKA%aon zFDI9{<1W82+SbC|;7N)~5+B-UChLI|p~tI>!^5)l)n_23>Z;+G4(d8g-VgMYnixFYE?*v+jK zXWgL@85{(KM2`Q0waA8|9aKFwCu{fo9j7A*C9;$~h*n7S#( z5^mH47fbL{;<6BS<~@xMS6wM&`f>66KCvLS2P0s5Rr!Z?;C`{icMr?(edB6&CS&NJ zW{&2=U*|5RQzL?IqR8K+TRV17u3|;{-Sa%&6)OBUG&@brQEDnYl@xkJI!;qEZ0L?i zSl<&br12z0@TetXh?I8QKw@|toFTrK!lXMG`Vpy)_`8jUPnaYoRjfW`sgRyG&b{*5 zVFS9_pSq5lqac^+#gaXhX))Wa+NNOpv{lB*m(w|R@W z_$aeBJ!)&k9_*jv-rS1h`MsPtza#mRuUaxvODq_ao->>?d*vG9hlgkgvjCA3*Zn14vw0^f65hkzC_O~D??$4Lsgm4PXG6QM2 z_96N&t=Z=3w=QC3*CJlhe*JexSNrFyj}^47{TkyDk?e5pD(ZQGCnq%2wZF7 zkT+hFrMr&6@7*rcJUD*;cKcankcm$y?Z6q|Q=91dcJx6<++~xgO-VT1!|a=b`-?%$ zJ@dj}rD`c}Fw&=`KR>HeO&vLENc8U6xhhWEx4#BhS4mQV1Tbg3i(N39qNaj&wO%W{ z@HDE$xfC8{BKRu`cnI?S>^C}HeqP4)>$1=I^fv~yijJU1& z(k33MQ=B~R+_>$1IN+L`?DYe2$3eh=^90cc>tf)P)`}>}JZNVT#{e4yzkZ3U|E!V^ogtMLz zmKR7K<`Ejv@o@IN;q#hU$rVJQhkE!Iq`yP4yGiLZl&M17k2-M9-BER5USvh;nT_x2 z4niT+{)Gfn>E8mKiUyKOWxzReh`JYTN*qfw8&8=3u?+n1rRQDSC#G0ct0GTPf@KneGg~?$sd-li{S8br1l9vB9i5E9RpfzL)`JQGi{@ zLP1Saw4nX-Gz~vSGz|O3K)0AOqdQYJ6-k!Iy{~_PoJB5~)tDAN>e=Mum)>_3SDPPs zj4~d46SgHSJ66zY@6`5Xep5#CI4n^e%`L9VHW2e1q9~3jvLP`jttp}Fk~{Incp$iC z1wSBYq3yOcye0Ne1$f?Q(Fc9IDCuU<%EfpKlIu|jV*&v zq^w-5m^|RJlGO(H*Aq#LJJ9%GgUVgeZGmYDYk#%=22oE>&AHD_9;+P9`W*qdCVXc(@_;=7M*i%kU4{eX%6djCIl7Ap0uE_T1To zgn4VF*4SzJLEbR{nV*(|3b6_s7n36-*jBfiwb6F5()QaM#&Enf)v008*Sv!b#@2W~ z$6RGN@djgllOG~IcMWwGVe_s7&z*+@%M*2TYIOJ67YaVBLfD0S2T~O`C!_^b-;&VZ zlxKfqXyJZx_$FnhXbZ$Jf=GLE5KpKblz;gyv6h~w-z*o7^HxBAZ(~XMuHb!gIYzJU zRLDzGe`_LaG(o^)1JmfO@UxYjphXrjVJDBTTr#cJ<(W@3;H5N zuq6>|@5`iun{+R$U?P!R{N{w#mOZVPn3IHT%#nX$NSD#qKzH9cu@H5mss(mjyBkxFqh z+~1uTXc@Q8KxcM&z4qxtcvQESr;`7VJ8560A3@N42`Ii{FTWEGnlaz0xh8I(oeOc{ON@JM@1@o4O_nqu>lmL3)0t)5x*BvD2#^)W5Z@^#rd75k= zPdBdL!@~Eqij>~-d91RtT?Q_`r4MRyqoyPRMJk%TLqzu-+F|GNxR-6*)o%|Zq~!Sd zOrFVs!7H@|Ym=yZEK+6VSy#8f&cqJQ*JRQnHP*`IzoP;F0ZeXW!Z7<~<%LTX5;yTd2Vm-{D@oTqC+?%oKZO$^ErVm12tv?imqSR0K8G z^drMdFXH%{#Zy(dK0Sl72o*v4A=2S<<}pU_g~ENd9FVWCX#ZG!lakA19oxNMEv8p) zJ(iXVp1`%=X)ih(TECfVuucr`@M8qRf*r>RRP{EDGTG|!N5&YX*Q|Z<1Xb%U>E^HhShW$eGYNRYT^2&T{-G|^)scO@;k=k&yK0O-dq!Ou|XJcnw-|7e;q?7;H(mSx?&# zQbc2sv(6o6DLb<-`>A{7Vh~JFTo20{^lO14*<9=yGSH%ClCByO08lDU-h)`Xp+a;I z$1^=C=C;2lj2OuP=kNFJ;zQ?WDa(p1qA%?e1xfWXSke>>6`zMJXa((HccRAf)WE9k^k8tWj7uiNB<)c2zY;U*#*AMVq~m z9vP7}9n7cSf>)dv+ZBqnJqli*A|Fuf6Sg3`*xu5=LtURy+{o!;extwM5aJ_46VYae zmp4(@{BB#-W#%Yej1rb4EA!!w$K(7!Txv?#EZN3o+Q3(G%VbK|q^R*A8Gk47Es zFYSqf(R3LqgCX=|)Pc}liBE3X?kGX+=(L2p-?yO^HXIF4^5A7eade1g=B?kpiT-!lS6N_}&3-f9NcmpCj(SB1I!Ir5#e zuvi2-TX!D z?NtMFS$mz!5@ctm6cdz~IazAXbh%p~%r0!@N(J8A2;(0LaJdE(OPMY&9@}uclqug^ zu5-|R{Qj-;R!EG_-{eV>AX%I_9u+uQmuO%d>+kF=VTX_vegN0mE#)&YDH!yt9zC7R zBYU69!`xPw0ORFoAcGpgoPYk<*6F#AX@w52b8*DGbRLhX!fAKy@ALt|(FB_xQ4&nC z`J(AV<8Ydrgw$;oU;eK3syg{ME~`;}XVdyav*JVIkRzs@lGAZm_icD%0*Wk%U0q}9 zb^Ne?qZh-!x>^Fm$21nJJ6nSu-i9)3r~6C91&nF1yB;0cQHnz9^EQsH3D+5Ex(Pea zEf-qZrLkGh)IXCkxS07SWLf8rc087AWfGE~8AAdeTKSqn+94Cjxg=P2hGYVXPVju2 zsfHzYXs71vbT}BM!;AMHK+T10r%wi4YP`}hTDAPnHsZdY{@p?i%V)M_LjruoU4hp! z822l;N$51c7hG;1cF=dw27^SgH@z`)GlJ@N*gv9dV{I?BFrtidN%39T!@KitIDF_y zv7L@T!wA*oDd96QISC7#_x#6eM0tZAp>~FNHOa-xqRnAKE1@q@GaD2#&n(O}VJEt3 zPEL2xYUA8Lbb_4HaYxoG1^-0owNZqWTzC(k$$mxW)xnX07yS(K+mt2P6;`j){+g~W zh9vDe@D&@_D!=~F(|aqkb_oRhsiC*uWLiqNC4^fJ{9KWx3N~y4zlSCCWW6zsAz44B zyVoL*uuj3`nT(L53)~I;hw$3U3h!XwJVY@aN>U^>#}t04TG6~TP-aVhD<7xdA5e7Z zmo;jQ=0_cLF@G|XHEFQHGrLK@E22zm)?3TlP;a=HdDM8>YflcFGDFq`alS@5n48|n z=3Oic-L@c79^bB+If$h$ zl(l*8jB8e&fMcuhQ{v8KI)+~b*XS7Z!mSVTH6IHhv85c$viVX(_~gaf-nZRR(3MKk z&+7Z5Huu=Q3uN(<-meiO`q=-z_F@pf2knroN?jT%%)=}I%`qcVKk>XfUKD~zO%Fsq$lUfTylDNIN=I5*=WxAfMN5dByq3nnKT0jWIkZ|KEenBY$X*B>G>)%?chf^r)5u?5`p)6wUGW{My>J zF8I7%<+BZ*XQ0Fswgm8Y$yzX!w%1sMy87ps=8gTH@X;rlbFRsz^$T?25qM=f`d!if za${TvQ)aG#@+f_d{X`pX`LD6NFC``ug(*w$YQ{OMFM4%3k9&LqTXgq;wbuEnTdF^J zrP<3eg{hI~6h8ol`X5mEYnHZH)GPz^uag*Dp6&Zsz$;@>?tx>)zvIsc{d;9)zahY1 zhpnZf{DVYMR3S_d`@8&gx1Jxl=&|lQi@&@NB1fypIT|UgXKM5W1eZ=h<}knbU;&5X zV@8%)H{lZ(YfK+2-U)SDvp&MvznPfKjGaS@fr%(6YOK2k5{+V5IkF^1u@63#Y$G<# z1dIa`c#|S4(#bgAjSK&L*R}0iCXVM$+jWA7fz9=XQ3P}PpeMIiZ z+}TFKf9%SS)jZHg$Zbx7bo#d7j+u;v?wacv{{gNupqQO~6Nz$M>D}tXbuvF@HIiK50DVb=xw<$kyMq9UFW+3FXXSjSq1=ev|krwLieU zzcr2dSt)PYZsZlfcOD)_Y)f6)MwWy2jo<0asvX#mKiq!m;7zwD7nm!;_2dXP{3n}n z^F)-`?I?@jFsQOCHA7c5bEzbq7t+P{-RhMz*bI+yJ-vF8Eg?-K$lAGnbhIukwaA7pyR>Be+A!Y)2JCjEQ;cW%MVu}{{2qlbhS=X zs%=hwN0jwU-26X)HyN<_^t$H+d0!Dje+QZ6U(&sW!P&!I8_*j~q)E$9eOMiHV||BF z{*HE|7pe^RdBY4^-D6&?Fw)EQrYj}ZIMgG)Pkis{X|40HiuH-fQghm7auaD`Do=)5 zV+ul3(6f$1kCV!~Rqs=7Bc3X{s8Q1kmY<-X(skeju_xTEX6 zKQL>J5)DVX8)MFHyteQ&5q_}1^`HpM)HrCg=mL%$)Xy!!$Sz#HBWiMdC`Cg+yt0k z`tMMjg3+7ndbB&TaVmAo+#|VrGvVz=7l}*(9~ZK;!Lwo(qq^FA8()t8T{>KXZhn8@ z)?I=OtA8{RzQ2(APrdo-0ObO+O~RtZE$xuSFU#HNcA8!HqI@5i0QeUf2A5oQTo?Ib zurHjVV`JTQg$n-1jICtNeg4lJ{#@PW%#l>8ho6<7&i6Jo#s&vMCV3&lMe zYH4mI{aNoxFAenQEkW7M`Fz{ozPUvqjT4xvT31)Y9IYdVQ(NJ|H>)9eQ&w@`+ejn! zQpz!OweZ|?+Tvh`|BM%XG&ydc-D!uH^yoXBery(8_4irYYa@vAbdGyI{(W^_?afC+ zk}RIu5GHRSMGr*KB?v@(Og~bRL zrRYU5Inl{BYH;2vQr*5-;ohVwzJQ3TJ zVFBvwh0wC8>`HswKNf+F|61C)iXcW~C5lF)It6a97c@iiL56u`UD`9FE%Pjyh$FaLvS^}MUeX`1W2eo2!-ZApsuFkIc@)JnFn6&j zL%@YrrSBa!hi$g3uUEsR#=53z4ZE!KxtzD1kl!kXbzYn z1&l2n24PxQc)3*^t)BNT@ApYAn_-9A^wN1v1<`h~C2!h74m2V@^D*IdV(djr{MIH2 zJE3z8Bo?xiqRf$YRlX5u#4Yp*^2Ooqxqj7AF(5V9+CRBmTw)Gc{3rnY8hBC-H?YmgFa!H|03`!W%YX} zwO1SOIvhtLY2~i6U&(dOjB0!gcCmpO`n`(&{ey&9D9-R6>Jd;XXHV^5n(KXG=31HP zBBz^FO{i2_?HiQM_|80mNyp1F5%KMNt==rka{lR}FSkhqmcga@r~AG0#tGDmjGnc8 z_xi->lW5EMh{n!*{tXYE1fQS+P?J2RNqWF<$L$I-b+4m3eD3ee!|6aSTv=QA3~5rFiY#x>Ar-Fpzx*& zf%3c6#7?xjuI7#s1t{m2!G=1Fpu zR`R``%$HY+zV8JxX~!RZ4Jh(;S1@)|Au}CHdY)(<`z2ynLUxVf2yLipHrG&gF_Hw? z?s`A>g@|^ykoHj;k?n#q7}NvB`c9P70@(3J28~LAnCWIqg=#NZ{p@5fIBu6Oi5TA7 z&J*YVm<&X{3`mJTd)@gD7_mfah~C!~rO5Pk+2D!~8K%pm2%24oWHf_j0|+=9{n-;2 zx?@1HRVPq&!aT<-EPxPGh8|eQX$d3-`*I*);WTF@k4^k4gH1`?E8pwP;;O_!(l^z) zDaWP57tR-IG|w?w{h>e3N+k#+Q+0F7Qxb`eD>e)sQPiCWut$ZyJQ5{UDmy9XtG>OW z=}l5Oo68M?7COn0!cdKNxA-d@=78lNT6jA%w^h8Brn3!nVjcfnw;SW+q>9{s?bJ-` zTE0f0ZBvQg19JQ#W{BlSM_-vWV~9}o7?tfg+ai=0rYE>#pt)zWRG_A@Sd>=Bc#%)xjAb&Z?#!y zyg6uOB@QTi5Pw*mT7V}+V*}lj7>0^Ggb+vvQWnK!31{iTD~F2klJ(#G-Ah;^SJu7U zDGAhfNi18O6%XE1MkKS?dxkYFs~s>x&i3-9yyCwL$7W~IU4gY9bX#sX zlyH^|#{{V)rZg=aIYHS6%B)Fon(M5ENr{rgOIW5e@%ieL>_2wNW66*%BFJ&C+_%Tx@mk;@b1x^OcQfW)m2IW#_A~JwokojV2Yekx9(L@7M z;^ZsfXky-HwlX1@rszr3T&?8({{UV=p}u*q%?ohSLPMTwqwuGTyhGr9OYPntocT`5 zZY-o@O3pr9a?YLW?I8!*6Y;$9?oeG zO7kxOd{pq?k6>u*WH#E2ITKjUK$&sDWgo{MqYe+&y)^x6^Gf)7l&0y_R%rdF4T`0R zrmE4BY4!|p#eDhuNql_K{{Ux?hm!ciQ?;J-;qMw~w;mkvR-0?9X?I`=?Jpu(p_6G0xkdzJFa#WMMr%wI=)$}u zh>tWA)opYvT{_dFDA1QI?W20?e=*+}{yqFi_~WnLYhE(dCHpMpJe?{DWEy?4$DM*W zOBE$@eqsv}NhZ8W%y6N38C6tdvk(9tq@FAE{{Z%f{{Vt@_@~1?Pq}g*KI)xq0c6c&!_y6NlJrO(CV!dbQ}8!C98oSVA0 zm-%!*4m?-jKOJ}$JvMI{XcyWvTErh|v3tmjYdlAIEnm3?oj z>30&^URp}CL2(0^<%&c(Rz_2mV0i>|uj<>!zYV+-@jt`zcxT62MYgf0xn@}I5HX5E zr38^T-ik#GcDfMDo(QkT&xk+pO^+XGUl26SBjNU^4gUaz^;um8p?$mU8h@7?$lq#M zqu9#9Hpc6;G>j01$R`|!#GD2;5~rBjoWA(A{(fD51E)6Nd}bC(rVHO*S~Pm!?0fri zYyKeDbl-&E71HZeyS*YEH&=owE^efei!*((RFknr0FhVa{w_Hmu1ZN9@n1B20{xi& z9ee`O;EO`FivIv!F_pa6Cxu1a<+)&|lN@>bs0qO8O?@l7Z&Uf#<2*@bZbg8UG$_Ptd1CFmw{h}-?GQ;=i@&HU)X#; z_@5pB0E+Z`tB)$$_C5Dny|YV&NYz=haS(os{;@+HrGQkgZT+#oZcTr|_qwmbFNU^S zhKa01!Yi$A@*{03#70T8ibW1X@~dz_1ZAS!Hb{Kh{0*7Y%vN2?cJ_TO-@nS&I_dF- z0$3-ElWN^9t4{@hQa~oa6qyv_OF>o+JAog^9BJD2p>5(R9LV~lS28W*xxgi+1Ubk69Fvj3&3@s2#~-$4 zrSZCT@DGi&Yl!?qX>QVgYFJymntbu6LpuWBW|e?Ex9;7P;gy+VK9>`Cd5p?vtVATI z1@Uxi<=gQ&GEOqX<~3a!k>qb~xBmcwbl`CB%jQ00_Bk-z2;olb#G~(jkHApoJ zhYOFk0Hl$X`|F0}lboFVh5J5#!8!a#KZs7V@#$l;(rjic2A69vl1ulA$lWZ8K1+50 zGKC@7ADAy9ziMfODLFl>#ldloYlgwR4LYxt?S0py=z3N7Q!TBESgF(Zmrjq>9RC1@ zd?VqH1?ceT9ud;5G<`bXK3%n(D-1xMrwRs1?oTJZL*ow__+P*t9=h>QjCCu0CrYvk zXS%s(b%F@Mau*a$11tcyORoe_wU@&s7Sw83)3_o{; zETe`n7nPwOKZ;NDZ2X#aGc5BL-cRmG`G4UDz)y+)030<@ruY|0d0xX*tu>;036rvm+blbD*QS41E!fYhz6;zOjymV-a{YSg18xqG6(MB2RH+g z42}&anF%bm){(4eU9q{yQOh<4N#xevwXW#i9Mfmkd_j9} zr&wC9`R=ZzSYe4rcXuIFeNBEN#awfY&R?*kxha1t-d?}uvHGS#!gzeTKH971so%G! z`JXC&(Lb@b!+(!9jj6{Jejo8neq+b0S;BzaGbzGdpaafL_+r@-4tB7~ufh+D-vquR zd=AxN(f%Q6O=j0mB3j)>M7DQEAf>#SMhjpt;gwHbI5qu7Xxi41;w>`UTF|c`zp%WG zWrF6~MV2XJag}9sC0G!02sO`m-{IGR{v>IC*q%Gk?6unL5y z_z$LA>Ux}Zz9I34nA1M9CAeR+7gC;GqOoHV1Y)JLx$;Ti?H{3WKN#@tFD|u6dr3)X zwrTmj<8Q~%_^gYDGn_U%_fq6bchRp-r_EosH|?$CF9qM+{4ekZmw%w$-Pp#{>eu$~ zZF3i%izaM{W=8@x+RxP_1a(^cZ`Ji}TVK>)Ro3tBbt~C$9^&q2o>s>LF66Pt*1xFV zjK2c@9{h9g-2NQ##+@9@(8&$1qa11W`x7BjUfNey*x6Mi5LK~;1aV)Ici-?#--$8& zMsE-4n$>~utCpTEUS@M~9k_4=Zvc!dTwwjxV1y0Zmpez9z+6v|;c(8CX!|OOYTmrx zJ^n|j#d#KIUlHvf+~D?0r_1#}KKvQre-->f@hz`}d>y9GscN^J#{?)_Y61gD=NL%W zBXgbxKqej#(cAS=;WmP9F$WetI0OGyHaz7gR&Mfg8 z6OXz`1AWrd^7!@^m#6P zS$wnW656%SpQAu?b$Q8E!NFMLEC?>#01g8YgYysVm-}ODUk9%4ei!&FNx6$%zOfR2 zZQUzJZ*G(2b7}?*vZ%@5xGZgs80Q~{-YD@Oh`eE}&*BdiYIl0RrE!lo>hflqSK2T# z%t_!3R-7Z@-Au|c#p7kmN$Qf`U*x|Z%&sKkIAEsmxK(mBrFRSY>;t`U{``RzzZ<|4oN5Zn!DlO4{M$&)vdG- z4cJ_2dX3}kg4WVDmRT6|3aSbGdJgy%`%U{W{{X>1G;ao!@ps1rwb%7mi4DGpRcF)e zi>_PF+zQ-Gr{&1~+TiWqkk$IeX`SVKJA+V)O5Y3?@A~T5`Q8UF$LDybH>-Qe?2n7T zX8!=#YsWts=3RTnt9Ri~3V|YtWQ@hAT*|DWL{Q9}L`LFdVc2a19FN@}hu;gl3-G%@ zgTsCW(e1RWNKsMD5rvXsx%uUAH%P>iNn&{>yZB*>-Xw+;SmTXCs;I)Mfq($)0IpwJ z@#lj4H>&Bn7Qe1(`gWtNOXO+xmp3uRHPoNoi5-JEpkDnSJ2mxi&}93DOuvkF}BT{}MZ>*#goSX^a%E2V*-EWfV359RjvQ`EJ4 z`;Ajqx|;U#>HNv(xRIrnM?7Vn$!0hJa(WC`Ti`#3-ZA)};yZr|co$2TRn%uIxgz0V zLcx5=U!q2&^2e~?6UBY?WB&jI%J`=zkIspx>pEm!7mg&C`)V-lB!J`?lGA?iDzM2d zAdhJb2|0{@w)`RZ8Sv-ej)=N1fb^Jc8KXqHyeb;*?UO$|(w~{mNyj)H2NnBfb>dC~ z2SF^l)00{yqF%q`epQxmMmrT<%NV(0o~`cv51Rh~W&Z%!f5CqTwCFYe03Tb~_?N}* z(a-105SH6ckra)Vp~pP2CPo<~0x^M)y*#x8Xy8+jQ;!^1;8~_+h|DX>zA9If`jtDjQ)B9t7!}H!<)p}c}sqx%vmgDNs=fg|) zYk2JaeAkiUzq2px$>V>7GTQi4$CA(DF9>UHlO0HX7(hxu!aT~T3w`WH%91jw&GGScm2iw4C8tGW8#m&KZhR}^aJ7l z037JHw}Loc8%wV(e$`{VV|}!Q6B{oCby7gw2;!qZ%=mVuZyM2{Uy>F{;Yp7J~8pPj68ed7V%e%b&JhkR$u_Tx|NjbEYL13HP@N#Z!T^Hwd8iz;a)>6lM?RuED%D@9H}kkjN|oJ z_ILf2J{|l7(Cr1at+uh^bXGSy^w!9?EY2{?5j%`g0l-ynQgTVJtm3X9;rZo|ry6Q^ zUhGxVj_UX6s{a6heC|P$=Mcxbu5LQ}&AX@NYwCSP9D+p$A9!SaE0*zx!|w!qLDN66 z{BNPyYnEen*sq{Tf^Np`rGY9=21=2fS6Xnm#c_H+#IJ|`A@LpWgfvePX_wmOoEY8d z*7tEqbpTQoi^Qy1c9H?e;G7!#I-j&s=c1mP-E{f%KUk^k`BgfW*G+n#jNczW;GLc| z@W+WX`Thx`>)Ix*bZyPG%z-D<8b)STl0rUvbxbLKTo5uFc1PWRvrp_v;2(n;Y&zeI zZLNGu;_1>kCb_wZWwx@BdD&==4lr?<2;4}>$3gckQq*-D9VWu#RMh8=*3!}^o_VEU zk;fchvokT^1p_44(_;9`HpO8U3Z+MGw%%T^`oB}>@VqCM;js@D2y5rE^gJWur|jAA z)8p2PYw)&i5MXs@F8ZG_0hEmG1Fguli+Eua!ayhTbtsDLcxABha;zXVv)~_t> zd?^&7R@UL0Gf8IhI-rUjoLuZcT^r<0!{%;9e&hIK#hw}Qu8{|bd?Tva>pEr5OgDBj zCBzbKI6GYl1cER}V_g9q>N0*U&2p&9jS1O3EcLVQliB|ORyE9ccQ4EwYSoR}>bm-# zFYssJKf}*~`T}U)4AY{D;p0TS)Lu0gccwhyrgZ~2$-o!`(!HE0pwg~u=W!T{xQed5 zc{xe$-siPL4^D+OPLi@iv5F|9Cl!gXw9?8t)E75)_Ez#;UC6OT5|?*!6;)JZl1~5u z-o6z0)B9J&YWIE__+aZam@!GL{pgUNz4md@$o}~|bDRU#y&ShI#AB+eIC$NjZfBce za1>p9Osu~X-u^HAd+@Kq+jg-MCZn&&fuPf(Z<#}Md5HUo`kuqM{K)u=@iWCA5+rmosF%IGx^FV#=2tHxx$0Q9N&M>; z{wd^&NRaGP&reGIr-%4E50|AW7r$_TAvYs25M3H1JF@iDD zu16XVl;8@&(e5RZ*A7{BF-@nZ2e0GSwb@aelPLonr)v8}1r?$35`50=#MG^=Zf0b| zz*PM6#sMs&wtH5tlMS2OW+UbWBq@Ga52>zXP3B)W*~`SChep~6?~r+}y^2HtjsYMY zNaGY(hOR+p4Xjgn5E9S0kYADs^d`H#BK}*gL+pwH@`elKmgDrUCu>|4F`$Tp_kqc; zPqBi|7zMYR95T&=@{BtibYq@#?OYh$Qt~|f98)`szm%0q<TE=8 z#4cS69DW@IUK-)IvALCyP2>UriAmhUpuo*^(tQqTJ0b;Cf<|la-YmFOTxaI)bJo7G z@GF(@wx1FyAyo9|iunV~yq+6rk~cqWU!6h7PLyJ)VXF-(Eb_9HcPbf_dQIvy)n$)|KgprowNC-D!2Ueq<_oC`?YT-%| z!Q<;+N2xi(epSZn{uH*pSGlnsT;n9D`>p;J>fy2W(xW&k;rkvs*#?{St9O={7IJx4 zF_dpYDw!D->cVPD=;3P4%!%7AfO=Bzk)%*C5CflF)in-61VXS*`aKMg{SLJBp`p?j#vv z7p+evviYkrg$xD{9Fb5=A>PahE6hqHn%CI3r zb}Rd*^sj@_{{V!lkG!AcO)A2Q ztbX%%^);5n#z{EOZgat@uC6}NK14eQAor>zmQjQ!Bd%-UIv=vZX{jtCX=CzsuOQ%O zo+(N`?Tx`B`Bg@Fp_E9h6aj_={{UKOmp2LXU=U7o+Ltk>DXhh1D{Ub-#&8ZY1v6)m zBX!A)w<-^%Q+2tLOu*>8V?FCdW{%nfOO^yLQax(ouG2G=ENBa{IV%dNk#WNu3dV02 zY0>J@XfjHTs5+Mjhbk0x$sOy$z99TkyRp8TMeyqza-$N=cyp1^dVOn#_+>5C*NtuE z4$V8qCLHoX$6EfW%ToHCEB^p2e!6US(++ zwm>9p6tel!clI^vzYebEp89)#D{7=;2R}Dn!o2dpSkJotIT}cibCto*^{$WM-;Cq% z-nDgmCZFcVa6l@<=1{ribJIEIxN}uGVWsSq+B&3jW5HreSC{Qq z>V&Kje8YEO&Dy^!sSI{1zpzn!-H+H|TQ0)kmL<({chma0>NCp?x-^n7MM08DzyiK^ z_<{RD=$;=*bpHSkFWc|AUEqNnXW!I{`HRON75rP{Mrf|Cn%>VmTflZM0m$4(E%?`! zcW{}WMUFVbe8q{zF=RrK^HHNaSDvRvxcx7wVq zad2min6b|}u6nt3TD4t8v5ic9D8<4|`m-&Z8kKq)l1--tMg>LExLxg`#E{dwJOPSA zsxVmKlYle*D0vw|h3dx@9I@_nIQf?*oJT7qVbT^D+A=FRTSpwJ%~_ip94xUB77Lu5vBz3iOE2!`V$Iz4s|=n{1QOrQ ztVUKv=*#TG6cR-7C*1%5d8o?uUezS%_$4{O;Aa&TxOZRP4Xutwb3hbsoW~O}A9RdT zpjJRndWuVVo_2XdDxNXBEA_`}S*KfUAq9(l-M`khi<3tr^m>j`QLw!#CWcvZB9O+% z_^5%I#^wZPps8S#I}zzktL{bJ^egNdRm2yz_bqLA0|$y|+iRnag-`PbgV2mu)SnLi zJwZ9Q(R^~t6!O294xob&Nt|QOAM(kG<3Duq(>Sk|-sLVXji;1ZqsCd+0|z|`>(Z;* zFWTBzfKmY}LBSQ{=GhJt9ZmaHY4mGz>iBnw@p)B6>|NmZi`e~3j##FRWr@|%fFYFR z5Pb;iPsp!?ejRxC#9t3v?Dx_e%@x>Y+21V)>6nT8ynhcvj@9;8hWu5b__I%4OGCL? zqAjt3`CT9Ttf!EFL)$g^7HN~#z|`fQtGd|!g2?mw8J!hUmXhlpsOa5G3O-#7^l%f5n5+hVR?45gi^&&ZexL*068S~1Fe1_{@0(j-^7he#agGs9|A?Fcsob5 z2HNUth-9+W?qx?IofwsSc$feb038#64op-@;Z-N?h1GHQu0s#?@HZ0S4sBVoQ}g4E^Ey^YH88r-A+z_;TCB z9tF~%(`>Dy5sivG*)U1nESTLRWS&@)U#;;ci8x#>K3uX&bst-475cv~y!_iE;B00h zT=?b51paQ{q4V$T+4~^;Jopo)TWYi0L#%kk?bR;zX@{Dq#fvi9EQ9w++jcjw9G=zb z9~%DvXHSKH9W=ML@xG4~nu^3@w$-2lEBkT-gxfF$nGu|@jrT{m?%SSg*>!ziOVBjS zooiaUips{$C!XTsW(gcoMx~wEj{quyNvz)sc*nxtInZr6f`$hvI!96 z1A~rL#&F2MGM*R%kzb0{&9e&FmzdP4CsO|a7iaoOqb9Er=B=Vn`*phGw+M|U4abBZ=oC{BQlWzhdtT=$4mX z6@DCQI$wru;fh%`4J%N%7S{7j(t@pKTlaAQ9D^9zfCUN%O8cAkko~Ou3HvkjJz6Myo#)gl>w)IOhJQXeo|N|>A|nS&y0T@Jah5S#Ye?HH@e>>ahNS_Vai)u zLFPNEB4-4R`%9mlg0A9lEA<}^@*L(^g_vQb?CE!U^LbmZuS4_xGR^YkN?2-eCG zyzlxSuHUjp>`mY=25DMf#D5!0r}(p9nPI)Mdue?83ldN=>?(Fn<8V8e6;c6QoHc%w zVR3*livDE20DjZo5x-}>aL=I5bu9|%9MQ@p*A{{Zmk;4g-x@ehr!u_^g&bnARcd37lt zD#spp2fMZvM)-tTDoU-4}D9P^5? zIh>vzish%ah>>y&~v@ytg0%Nlf9>o*fBNK^frJ^EIt$8mEUf zd(8u0wU)zC)8~@f=Gt+#NT!L3y0G>m85sw$rO|vtuXtBgxA6ytH4B|DQM8aeS2nV= zGOjv}$U(sz5zvkg6~7bkHWN69*C{m#^lNE(yKm}ak?}?|E1%WsUQz0v>(jCQb20ev@BT1q``;w;$TxCv1LV!(v ztR#sdtDy|60R$X^4{&Sw?>EV@nRRbri;cTJkI|p9WqGa}A4}Td-TSVq)RP#dmCI(7 zkOogm_}liI{jod^@FPo;Nz)~RT=8|nvfJs=NUoOb0Qs*96478VV22FDrySQd2QROV zuN*cSJkos*jBYlD5{s{hm7# zh^~st^2<`Sj`qT6?rr5=(aRHuM;@xn!5>QgRK6(uUh((FKNU~nza45)UEE69js<30 zTW}OEA%`U+`BVZx1+kNpUfKIQ{@0%oz5w1^>5$JRvEc@tUhhVo^RY-F&8&@+103$a z`51wb*1t@|cvXy!586|xrB|$adMDr1_-sFjSZU(DoZ-t)q`y{=-`|R#2tFVDWbo_w zYsdOrSN7mJf@}r3wUNdQkWGTow@l-PzzhiDzYhK+e#9Ol@W;m;8{uDvyh)?aZ{mNn zTxpWuU8FY=K?97(JZB8k`D__PYJ9*PpgR~3r@l1*0KrQ%r|{Hv9|P=X8pYf}rN7l7 zUoOeG$|N~qD`P(~Ax1hL-;#Qti@Z~>Ytne9S=DcEHHjw|Ci+{4Yk?}9t1Ow^r7$*a zBWME{#d`b;!@2ev6PG`;gp;yr`t)|Xw!eAj@n&0ii2NB#xY-tAF|Kw_pJD8 z>es-Z8!n{OG*Yo!S!)G%+*v9qRTJQEomlM!*zQruBxG0Z7LlvzTJE0?t)kxBYBpC3 zz8jlKQfOsdk(6{StTG75Jw<+P#W`MMg2lI)UCHasx_>{@q4wN8nP4$kb@dx0w_R8D z-1x8dw*9as{hwoh3+TE9-R8f4VfKqHLgEs-R4nXJZ+wF*E(zGZ@Cg~O!M_)H>&5>7 z5cS9LH;uK6&0|z&F1HsmGZ=R^{jjb=!hyL}j4F=8zp9@Tcvr)oG12ZcFBEB(x~`ve zjFQ{k#Ir_2sZ+xPgM-v^I~x2${i1(i{VU; zyAkjV@hofx=Gs93Gm`2n?mRxua*RA(3ee@M{?pd$ucniFD5pTKqWpwQ+yq8+~4DTP-J0NW}M6(y?U4!C&7_j9EtWy_b^5Xe55s zp)4{*e5Lz1{>nZA_ywgc-Kr+D;%k&owd(W6x!$?r-A+s?^AJLup5)in#jrDq_>L^b z<+!YTs^yU7hhEM1OQH4LIfTaH@R6sEpESC8FJ7QPt{cZc68r)1&c+Q-$J+IlgKKjr zf+?@2Sdru)oC1A^C!nvAKWZP_gTg-pWxMcahM--0#!Ct}k{o@G%*-ESPU=e*U8=?) z6qR6dEAYGGN5x+r{9N%Az9{&Cs>`b1Ny2VBVS#V~NMau(N^z1D44zGUd@bSrFEmQN zB3{mu+1pp>yLp~wdB#|56x|9Q{ZGMqv--BOzO>YBA-=GgC%3pok||?g$mqi=%0~nd z$Re#|0nL9npR;f6jpN^dHz!KD^E^Z0dCGa3ZPe~w4WU#utc*yD2r;t-KiSR*`qTD= z{{VuICH9uS9{33q(8+fJ`(CrCL*>hESdp^YEyQoY<=Z&j@_`Qgpzjs#w2gB^Hl+cON9HtQS%Gyb%e?>@Ach(1KV;9?x4|C_wB0|){wkWs#a<`6hC8ckxG_GT zXp*R9Ho%ri9C8Hz0L9gIgNpkz;t#_g1N>3&?A{mg#*=TU*vgF>i4b{^?#4_qV|-1( z`H+LfepAF5?n75Nje2-~XvMx~w>QhJyzO)D@@$_srT_2ZeMU>%a8FZ{t2JsyMG+VAHwZM&VLHuTmsL3 zD=p5O3qA^!!TU#+pD;U=x7|Ga$MkdH2kgJ_=imm76goYWI)=R9HRiW-61CKlZNU+- z1j87~!XX`b$t2g(aaR)X<|{6ZIK$dWuXRV=aDKMZ-L<*k@b*KVVR6n^agF`!>YwAU z;C^;~*Z%;wSBf<~QvU$qZ^EIdcy89l;(JX`Q;{Ch`ryRDO52>Qupo6{Ux#dbqz+H! zr-!^%;vWlmj@!ll9n`LDHLFR@%s0_W#&Iz@W-P?+-I5(!D(XtGDo8)kr|lK{DEvD8 zp>&09N+r@fLuirBZpzNxMFUroEzF11}Q)6TcOirsw0 zLXq(e%K}g*++qO-FSLGke$zj-&%_Up8r9a74A*)OgcP8IL<}QlzyzDJhcdA^I~bH) z0tRwx`V{adfP5qH-@|sE6!2b~2AQVXBBDnM0M1ke-y>smjzHy@mOagUt@}^@#Xbl4 z@ofGf(0rc__`(R7M|}hdRq7{sYuPK|b%a>DOjSbot8# z8{P$x7tH||Y=DOV0iC%A`X>FKzB%as0JH~yA@R16r(Wq~NY?jO_mMKHPP~y6xY;rl zD&dfkh5+HR2f#nGxBL@+4J!Bk5>JnB73`MR0j)GkIgFOBu|7lHNwrLY@IQ6{U8IK2 zc>QIt)9iH1NNhCgc&#mM(PN4j5o3-tQGlw70;mIk0OG5U^PHm_QAY&})K6Kh6}wxn zujWSvWqFnkoBGSN9?^QgTl6jfDb!LEftvi$A9yWsUO4ghhJGIClIy-Exe!2{Bx*NH zEB08>a>w)?O?jWi{{W5Je}HW#(zH)6Q1J;*-P@9Us0)49l>2qqIX>0-m*X!Hc(cY@ z)VkM^lDdxDMZ3kPUbXv{s;Ri_8Oz3VU5c3?%^| zuTzYSQ{lMD+(5wqsrAi%*_CBfGWuS^gl*^5pMhri#w#$Z?W@VZE{o6+nb&j&%kQ+F zYFI8IodA3@at?UrtIIS3UHot1bM>sdyHrNp&9IY^&sz0QF*}ytc(!y<#GmzNAY;~$ zM=F6SWs!)_CmeoNg$p&Lb2*GaX8YXl9C!R{R>DMUb22}gM>!)H{3`FD6))`#)*b+e z;DSn=jy-DH$1mB+VTp|HTphVT;ZOQsJSV3aCd{5*`rhlK!k&6^;`kawoPP4!+V6~5QUR1dt9tih8(!JUN z6qbT}ISi;HJJnk%SPp~UxN+Mf(98bU$huNei7xU4e>OWX#c}q!-K3M;SkE&}cN=A$ zRaG`IJ4YUaf@@<%y|+y&IV6fy^CT!^=0;LZIUR*mmr2nhyO#RO1YmMqp}{4GVg-57 zTArM?3FWyMF%T5?>}xhV75f~LY0|8C4APKtH~LnfSanNRMON~NDsrW9T<`XZ-Wjd! z-dlBryp$3TEq5sO{vq#OYSz%?Y}(PDMAM0yK`qV+01N^2_4?P=J^|d`6w~IE;IbV4 zb@67Rn$DcMT+oNPW_L0;JAlqO$6;S*_zGDi@Lrm3kVhj%OD1whC-ScoJfH3~?s_;C z?`U;58Ip2$7F-N|6fj6)EXV1)6y>&bB4pgk2Oyv2Rpo%(5G723)0+9n$4j2fmqt_6 z8YQy_a`}uh6miGUS0~%0bXvA!0Z8Zh3d6^>chJ!sHFvNMl#B;@QAoooMQeL%V=LIK z>-)gK^cd?-f+q8%X8WWJRAp5dZex?$q%w%myD(K9Njy|0?{Z@FGPSJ^%T|mf!T`(n zTy?J**0dY#B6bdep&W-&Pagi2?~KvN7v}(uah|o0cr9)rc%&j#ZmKyr`d6ig#!3%E znxzEwK71<{3xG0xI@D~e>-YYE^sc{L@T{?1eXmOep=IS5V5zPaQp(#i&Pc#Moom{q zUNcuXCrgxUQJZOGmPhj%PtXc*n7jFEhXiLMu&CpTM_}09f(=-e?*xO&QL%^4bAwxn zS*sO)d-kw{{a!ga=xJG_-I#V9edE;U@u;t0X#+TRQ^3NUQ{s{B;y}@69CG0D57WH^ zCtSkcK0bb*g;SC>`%4BmB=c4-OU`51rAKD=sn=kCz#IGq*c@9jpRop z3~l4|uRh1QuIF5~wp5soHm*n3xNYg{Tva|JWPktB{Ry^+pxv}CdSe{bhIZZ*DaWlg zIN2fEI}8ev*fPczPFuZx&ik|Qgl6i1i*tdR;Q>3&?BgcA4^v#)xL>`FKDFmJQall| zD-j{?EAej-Zn5!S-CwEwKj6eIlhpqJ8~l-AN)fj9runfmm6dV`#~d2X^T|a8cNXps zJXBFG*55M~FUqO(uY$WjVNukzC990IELn5FZgEkn%Oo4-E09X<3yz-RoYP4Xoz6C% zz+l#7mn`F8JZChm)Th}N?XJbCWefBalif*ndSH507{Jas`kIlxQI)MJy=-GsW8~i* z5BRF@f8G_<{286#@#VaVrT+kNGT?U%Fl&_Y!Ox1XbLuOt_%UqPemu9gR@hqJW4qI6 z2EVED`7f$cU)_)7e+Fl8<;T}g`Q`bu?C-NkB=aOjjDj|Y?VkN{Rp!v_q7qJIb!mQP zZZccg5sK5eg5DUjJhDj2xZ|FK>sS`{@#MON3vE*4AdVI0NY~eTy zfyl*6bWkuckaO1+R!tTeBfz>UpmMSgrc)TDWCPq?iv z?$r@FqZD(FE75E2Gv`ZCJaJA39cwh-&prC++8vylzM~+>wY3H09;AWL4|@7@;cx8Y zuU_3IweccxW{fi)l@SH{f~?LM_Q)d@$C~CCe7=jTikgpQ)%>?QvfQ^7kF2R-VC8+^ zMe5I!^`_0p3&<50I)k)INwI2pjrdhd$78EJLn?PAUYkXof6vK^&}8za`Y? z9@PMyd$OFAncwr8+T(Jze z$3Q z9G_ab3jY9S!t&fN?m+a;N$HBQ_S;q2v9|-y1EqP@Yc`#aZk7kx`_c)mZtzuPGK1Nb zaZ>neR`DK>sxGOg$Sfqczzm;uP!HUZGlo(){zM*W?Wn}5D-~?Qp!ECOvw%HM70p9& z1g|B?l2&;>@;h+9LE5~`RT_2ax^y!<)%;d$Zc;?lfX zG`g0W&xF5Yx_f0k1b}lQXZXJG?~&8sb8Z}+8`i_?9>?qX4;RA`YB;?m-=X$>6`!YS zmKwFJciL>pac?NfOBvj(J&3FM74pwVW_`MPTv{lifrw59Yrwube$ihJ{s3r6s`%34 zSGl_mwmN;Hi7sxP04x*|6#&S~NXov1o-4%v0BmpC(^LJPY^`(;4(Jy*TIRWI?_;Um zNB;m4t9DbhgB*sIR3A5%xZ@dM4SogqtHk~!@xHrt;(r%vGwQmv>u#Rv;fp&Ak`xY# zi=UNL1zy6xN#M=^shZmtikB@PE#LVc4aB@(3>%gg3$0GRU*>yv?Fsu%d`11BHJKY% zx|2`v;z%0LLWV+Zp#T*RCr>s~80FhI+(Ll31DW_M@$=$OzKi5Dkp44nBskECjP?x$^`=$4Y->Nk?CHrBT2WR_W6;Z;;*l1+48 zI{1C#Z-$!Z{3D(_(XO<6@}6SFa~WSTGmY`L-F46LDI&je{>jTJ&{<^T9$RS+{&wHy zeo*mQhqe0bXDhF@`hUPbqF>qj_O$fgd#xV%L{lB02_&~|%(KE6 zkO(`#!F61W{YXi#Vy|__b;GOGj_K{XW#SO+Q7`?lo;%<|{ip zNZMKFox(*FYBJ2sZ~z06PfGlj{ilE6q0mOM_&4@`xml!Tc<($y@#SMAjiSUc!yInR zuDBT+27Y1jkL@x0Tl`1(^{LCGNM-Q%h3zMqZS;s4E-n~NsOnPz_M7R(aPCI|P#v+L;TP45Gkq5n@#YgJg2UkwonFgj>u$bV zo*qTQxV+V>mHpU%H&%ZzSGKoWb>!N9qXd?>cG5IcK{^=aibObMbYK}-PXrN=YAgfD zujsS(mi>*q5Boq_uBm8Y@b8M?W|4JEdt@f>DYQp-w+$ljxP0zVIxynD3O+G>6!G`K zzZ1>j&l=cC6k_4wv(yn7S>G&R67V|#xNTraATZA(t#)v~9APtbsr)s1#r}`4=3&Gf zEs4upe-%IG{{Soc6{onA4#X;jjdDUL90SfzHT=dB9A!=qt$qIh z`z`+6o+S7?9lwHpBQ4K}d?v1m1i{){?Kut?3B8Y*0xmHSIZ!Z}&f4=lLdf%6!h-sY zuLZmojrrfNr|Na^hHHSq*S0D;y`|gw?tZ)cbN!mW8vK3Gj+x@CF>9#ARjoCPL`A-~ zAd*x$2Xt;3gCk{0JFpp5m+(vD=j_YzBldmPB)9Q3t6pexPGZyb%XrV(E|&$AHVYVL z+74oKoU3dui@g5;NZtzZw}k#7Xcqn`@NTJXt7(@x^6o8U5-vwoIQy(SgUIx)t!GBk zb^R{uMb~X&y|BKC;JCJ(cf~An0<5g3f(Sh;@=hP)9DYqdtI2ayPguU5ouA--iJb7x z6EvgkD85V49apr`(t2n;Q* zJD6CTsNA~kvm6N+6y%Z>M||KMaJU?!hOmxJsK>0Eyr1g*68RBd7nx^xl^J`_ve7T= z%=x3jo(u6`jvzi(lN);rces=!YFUX?1oR(?;AG$&38HA2$0#VuNGijS zN3DNS-v@pOd_VXl;g#^$f;7mWEQq(4?uu`3oim5HlmO0h%s~6Ale7x(AKHiZF7S89 zzdOY`G^?Qa(mQsJJHS8DFG_*st>OS%E3_z;K4nva90d#V7(W)Uv&Xs>Y4Xr+%3Xb0 z{{V-v^%$QIaWKOwv}C1Ita^Nz{P1l9kwyh~zY+cz_~YR>h#SK`JkqDq?#YnbnUsXN ziZ?0d!r#P&5>75cR2iN8^oJ>sr+2Ictl#42%Xz3V`s4x%p8*RU7~Su2_z= z+VS!Z63eor-V#3Z-MwF@lJrNdn(-DlGKyGgdDfmZoY2hD;^IYq` z4rUR??8aNoG#miY6Z1(5_lTYcDTDI2W&2P5-<~1y(_VNd;MCISz8qqUF0zJ5bsOaX zmsHNvZwoO40C|q9$yIhr{L8>2nwaBm4mhrQoDG#!$NvCm%9`h0KFjmBa_Mm-L(|5L}5ruq<69rNcQj||ZP(*U+|2bZ4bxO}G#^{iFx z(JfnNUR9P>$KnHstKvST;7i|$b{-zSpTjzI$qmk=u^%NGPK0EJbW?!t ztF;MG#1&Vhnd9E*2SN8&{RsPt3$*n*8f2 z@m7u_5h_^tdpJLZuAWl=05$&r1p7?S!|Jr@drUOBq_Oz-XDXX9)28ra?nn>;9W^3XtDOI3CgAQ zp*;lff=cI{Ff5Iam5vTSm@RWa*E}b!U3hcEwo~bvrIQ7@v${hZ(K9G;8BZfB!H2O1 zw>%N!zZHBo@jZu(d>eZ$mZzu49yUfHBus+QN~7+vrEVKaDrYYNmV z%T(9HFJE5k@jRS^i!oVb{;!BmRMWFhc=~^l{bW}y;{N~{_)oxECC7<8XR6w0+Fh>S zHtOEcg_Lhr)97eO1wuL)b-KuDp-kQ{AT!k%O~=YXw)kJ=-L$+&j_;Zi9> zXMJ3~S5xxmh|IH$Z#)}v>20I_UZ?1n>~;HE-28ab*TViM(r=T*`dlOJnw7n=Z8GTq z`DJM%8*S!t4(P}TSY;2*v3~If0=Qp?{{RoXA@Iw?mfjNZK9>fQrdt3+P_7J;Z3@yz z-+DO*!{~bp@^f0jXYHOq`HU)Iim#A zfgLN%{v-TZ@Q1@4HLo@MwKq(p+t|jy+)mwssXPx>I0C%*!cumMyE`X1!Y)#FM{}#{ z`gVn6M20eJMo<1e~Tk%)LtHUm{y3$)Sfh~|2x49;=HKS>9WPV_d zP7vccC$|}_3r$XUf;i=sq2v;C^JBW2=^3peNbRj~BW!%Cx%rQCU!viyLfneUADCj4 z>-*Im+2>lGn=Py>D8eZrUw9=>YNW;!h@I2{@~Grz>5BC^jlIU7D1EmGxebBbcKX+z z$!1nVG#0l25DK9Ls5$G5SGPkO34BL~iNQ@P#TnmZj#*+~o6Jw29hfhzP-~q+Nq>JF zaplUQxXEu!R@Lm+5vVaRS5i0prH?g?Ce+_kM7WJ7w+cyMHvWg4iu9G5Jk+hDA2Xzu zj+ZeV(jA~DVmtS!bjVCkOg4)(>$;x0#CP6uy#^WnMbJ&Lo*bE3eO+FiQ5Y;%WpILY-Ur@quyD4>r~wFVc# zmOwaPrrv74rM_KVZ5fZ2v2IiY?=>-Q-rhyDpHJQ#a#-Z!-nyZ&$oDj))c)CJB$|7} z_GrOeo!K}YF^c+^;i{y54%0R%8>7o|TPHrIzE0Gx5#koQe#=KWGA1$z`d8K84jx#1 zDW|-JWJZhSz`(B$IBVFuo`wabBhX#Pm*xfCgN$OcB)Rfs`#C2rODkg_dm6BlGr$*zD@e1k1<%U4R{?gBf?&C41=()aC8;aYLF$oM{8~* zWE0l8dhMk2IB`1w7^O6v_US?CS{O<#D;Y9Ly@}il0zgsJRN#4b>}EqJA9Y4^Q3;>S zlHNFWkU!QD$3BO>7N^N?Do5N;ZuM#xS0hw5NsuVUcOHrdHDca1WduT39mpL1R8P6k zsxVRRGgbb_D<~pWUt}2k>XyN&BF1Kwhv{B>;%^A9sLgq&ffdYo1~33wKE9uYcMWNH zv5_2~Ic^z2;Cu7USB}##i-pSP?v4rkYdRUl)zdPDQ5*fnISN4N zC?-UnL^jDinnJ-y!Ek+h^U|u=tQO4D$k+hjG31Y>OK_7|YUvr-?XGbi)e6{S)SBk9{=cPg)qe5NkN?vB z6^X==i4f&mk&s98=ANvi@Im@iBh7UolrPZcrh*Wwl7SmM;O8R0Ws>TC6&PKVwL;5h zBdOXz>*<>FbEiy&r+IwfNhdYvx~rA2n;@KE;8&Wgv%r%_ytHlebNn^<$B1_R(O=`2rYoj-qzk_l8C*6hd6{-gLh)cRj`bsqCmm{}LmleF6O4-9oVGBiv}eiQ zH~#>UruY5fU6;XEjeK zpL(tf4s+I*Gu!;O#jrRS9@NyDR*ZeCiFwU$&TwPZsMw1C0RB9r{{VKpYEhPAJxzNp z{{Z}X$?hw~?p(%LjP*6^Woqz}UC$M_eReGP4@7t~Di&;_y3pD;#c_B|#jJPfQHg z&F6XMCZ1)NZ0qXd)=S0O>iwRu$jdf039sDz5A29|f) z03A(IdoeVtGdqH~!iFJu^&KkWV9N{$Gkr$PAS-oVG9p>=w{HE5&$q9=X64bH4)QeRxMA{+m73Gr zM8S^T(wqXLIX?L5_|y^FEC5Jdm<~=`bxU%vM;iH1z9r7$owM#b8q1Ylk)=1uNL`%T~JT2k9t$f0Ke zy$^Bt*2+Fc6Ri3jCxiT9uXrlrDYPqswyh{@n@2f21L(wBLUOgejr!E%wdjb?Z)rkA!Pw_pbt~n)2$@9vAJ8@X&&BtX#>S9 zWUH<@1QE@A_HDzIaVgZItuyR+e~c^Oq^Q?!RNvv~ewYKATKSLR-^ZJ4*%L$YlX+J= zl_S3^vMEawfgl42M;!2{+rQGV&XXv3*fO4kSLZmq4Qw>sI?Bx-vtaSmaa7e>61rzI z;~xZgN5}d*=pHoDthK#8+i|zPg=Rq5BXaHsYhaPMVBnF^n*50PoBsd=*6}m^MThK&`_wm=d(f#MlU$f`zwea)cCV^!hoo8>Uc$p(2^In=2`&806A^fs(Vvh_| zDxbT-CcQJ_f5GpE{{R>KKK>o??voCoZc)}Lh}&~+WX9%#Ve`bt10_#58O|%zX}tPZ z1#Hh3R~7Zzl^dzv?*9NT-y^D?U514Zt40fxw?py6O8)?YUg~jtL{`~21Wu^SiT(=~_;3X+xVQc>YR`fn0{iOc@Vy^@I za-&0=u%L4XDPlUfz;^?^V<1(8{#wTiMlXT`c;aj6byR>>1+kg_jo|E%;v) z-XZX;v$K7YHEDGXHa6u|$0m4`85?MeFrX+AoZ=f@ugVz<(KK{L&Bd!`@V+3FHF2QiYtEu}08BreA-^KChL{fC5n zN0e=Z%_Zcg+x%bW_xC?A;ywuEZxxp>SpNW*`5%)X34S>INBAkN+v^?%)vhhz7g5~Z z>83^&SHeCDPZ&5Mm*p%lz+(r2kJ5h^f5A_*`@aRr7leE#b!p;Dl$te?>U5IY&9)CJ zP(p(-+)vAe#?W$ef%!S&j|=#N!J5_AhkR9GK9{LkOz=%@aJyVQWUFBGUYo0#-}>l$b{jp+viQo4DY&PlyLxsypBH%7#r{6j{{XhU zYpvgFm*q-}i}~7T3QlmjI6>^74{FBJG)-T`8jKn~gJW^2U)++g+SRP)yZB!5Ofvv=&h@U!5a zkp{WpxM0=1L3wDA+-uAUl4#flA|+>#LXr>*Di9lL=QtSU;*KiA@b(rEr%zY#q^z|3 zE%=*cIW8wTuKA^+_@woIhshtaC;Su2&q$Wf#{U2w65mpuNETQ;I5N>$%vo7Y%!=D4 zWk6M<89>DH*ap8=thDc%Rd4_Uu&K6e85Q#X0PR`v z^IZ4~;3U@kJ>jeUZue3%TETY$gKMYFv*eQG?U403RN09zM&+;PtY&4HXSmp@zG=zm zwoB8ZKW4&UaCtrz%9`bh-SvLGPigVj#V?273+y7)z94JXdL6yPh~aCe!%D@`Fo*$m z+Ff@Q1SmV3025tak*#R{BGWDPjSlwTP_VmCGT!FeMV1**@wr_IRvF`i)MCGs{{V>} z5`1a#Ys8ayyT@9LnvSD53zqpKi3dcAG4jTv(Lnd+y|eaf{ir@Bdu}M5&&u_e7v@+v-P2VaEcUQ4n@)`e^*2koiA}lv>W!mm-HeR?06-oS*JklQhV<_c zTxr)@OqzwXcMWBF5L`j#4Z&mx%%OnooQ{Mk$Q=vBlEzX+46FztxdeI|_}qhvvDr0u z>|Nv5FK_q{%^ypf@a_jRqNdfQ(o27zbNK^C$j(9ZBau(9*uUF*_D1+`rRX}h#$OET zcAgm25?L=b3p=?;x74Ja*KmMkjC1ZR%8|z2Yq10ZcMr!*spWba{*d9!`i5CQ0HnZc1Pi#40w;mdd;_n{3WK`>e{8*A{$#oGDz6! za5z;R*pQ>S75g#!F@M27G*1aM9y9pe4W_xPJWW4PBXuxiJ!dc7{n7BqfR3gpYGyOF2l8$j1t!B>M{f zed6yC@mRS_3zoi0TVH8E@?VeSe#ygp8A}Zq;&FDA`*dID`571f7x3n*qPK?iEiTUM zNhqvk+vP2klM*=yW78#ptib}@p zM>zur9>NG=kzW~<=XsV*T~?td7WHZEB>Shm`=3dV%P={e7fz+6Z7#Q8KhXX`@2s!% zoBNF~R=1YU^3}{!+s=&~u}Hm9(ST)P+!31ePlewXejxlD*MH$6_oDq{>zQ45R?2+LQ+5=9TRfMjW;~Q&1ZKrB%WH73%zHgT_D#2Y? zmR;mCF74QDp-;;H0JEq36P6zh>Tlvdj}2w3&21?A4wqvruWSkwVKlPlKQJ-erV}19 zhTM7&*rDPMO^nN>EPQ?bu(j{$<@q0&z6{JT*x0HsOX)7&zGvEhvj@lNzi4j)w~H=3 zJ9VH*1c?h7etTQCDtAk3F9p+bkU<$v0VLPbn5wq6&{@Q=TEvjVg9?$TRZ-{w;8A~d zC6$H6f2}eh-bQPOWK9ggIi~iubpb+4}Yt z6)DkCtv+cj-*e_4+C%ni_*?Ol!n(cZh^@5!7h6kXEM6utB28)JfJ4nDQ+z>J4jHkq zV;})voIkS%?2quT#{U2kwIA(|h#EhOth_IH-|;8YmPpO|CN@nWmm96gBr?UgWDLF- z1LEJcSM7E2JL3+$sCY(OdHgfsIA)Dw(PfWi%#8t7^Df zt@u}0v+);(?`G5W3ki~VCxN!QJZB^7GC2o>(2Q5^yf?ym#B!-nmMPPem%AP9r+wA- z(doZ)@|^EI%PM9nOAe%>^>=Fj04~2X`o_{cHK6DkJYF8rH0w(pG8Aai&fXM?0F9l) z%TO1bobk?iuYmsmX&=}R!e0|EA@L`|!DHbqWm;CewMaJDUwMSS?=CW;DT5R9uM7?t zaJcI~VvpKG;xETf4&GY$>%+GTE{upD#Au5Jo%FI6A|V#l3o<6s!ZRQ_+`=HpAMVWV z=D#C&jK?C(r4>6>i`LHR`t*16{Lj&Sh6^>otH$)?wCLBTWAlIZW&MFXG4SL5DvyiN zSa_peaU0uOG-gXH9n15fNeM~cIK#3GVBm9KqsbaZijjb-qY9&d0O$v`I1JLByw{zY zXE@BtyRVF!ll&K==yc_|ZW{qh*Q;xePdg? zj?&r%EYBgw&;iwn=m6x`!5<56;-nP674l zUjl2B-*}HypIg_blJ@%GhLTyBxi}<%Gr$8U0B|YqY($MHC0u|r)YD;0lqbyj$M>^? z`B&>XFAQPuDO7&+&&Tu5Da7OIa?^aY*H7~~`><`}X#*7t)QZlE;botClbxfIdskU? zXl^1gg$x^mlZ;n0eWFVO?@~qvPTp~j)$U54RCp?{HsZWUV=3x$`P6NCU>iiw76uCr z0LQ5!gniL(-ndcCM|j9>%xpJq#}$>Cvg~f?nv^n@g=ENVoMVDVf30emjl`?uOz24< z4nR2eHRo%oT4_$PPQoW6A)A3#Zgsep>N}}qx_68fi^)>R3wnTXMId_}B$`{!WL9;C z;4#U^1a_`M=ECyU>RXAIV-QtX6UhEm*IG$;1P!d}O}x#83I+fJryi!Zq_$m3*evU? z9&p?ceQVIeV<#4ro`;o)!YL-wJfUUOV~xwZ6~=cS-oR8*{kBlxNbe+Im2QWc-qh@6 zvW?}FGDeTJwsIHKHASuFxa#kY#+`Lo>Q%e&=1V9q_p zaaZjA&v>CO6tgj4Q#t#-ovJ^x!tb<^mgIZXvcn~&>0!$48CFra`;2$3E@asm8&RQz z$pe5&p=_KA^<5&(?3UM2xGZEY2dOQJSGUMSdeBERMyw9bKqImD z`d2j^%cxyXtIc^Oy~VKskdx4ijywJpvlzS5?8Dxa**_>lZQR|4F~K!<@(U|FL34Ww zd5odKOdMy_*6vLslc`GjJKa(UE@1u9g*VRXK@tPdH*sHOd^kaK;9V|qy9-2EuHeCN zdlAKag&mcoX5Rk*O_MQPsau5&m10M6Ut;_|5nA{!O|*(Vh_Mi%PXJ`si=0LK3%=*4 zmn+oN>86f03v$F^a5`kwi(pSkKaml7^wvH87a>*wEj>q1tF=$CL$h$v;XPWRX zne5AB4_4eG0G>TDT-GbC)!>p?mTiSY)LaN#4Amj?TWP4;DW@IdJpG?)8iDR{ge4B_JLiOjRQ8Fxrcb$>2 zC4)9e^b`QMA1U3tP6yogeiE8_vL9O^V>~}L=*jzl(ag`&5`g>OgaXfGmts6c%yk5qX4g>K{Cr_0;M81<^Q(8In+ z=ah!PA(xC+rlD&3bn;o=MdYfUp|=z7SW_%wQqeJGZ(8+ZCAv9Yj>YBsIDLhbz6n-- z_^ioZS!OZB*>2@JVxD4UfhTEH%FIG1P6aU~M{hDHp50%DW;h2P^cZT z)B0t9oq1b_k``ExC!rp-_{WG%Jjq-6SLV;@?*`*+j#vJ7`6D7&*=1$y38w8)(xzo~ zC1v4-RXN?pabE={&)1W^ixK0BiHP4$Qb5i#ie@*Qfl^hK^(;m+`Bk{%`EX8ZqJ{&C zu!Zqk!Wyfb)tX74CwS-nM$6Iu(5}PamA$ps#0zMymva4@;EZrX1>^OuKgS>PHQ#6Z z!}(Wf@HC?QLbrJaRV^-D{{X;KHT_YRul6cm;a`#b8=R}=75@OoFU_B4+{GQevzV2g zP-7iiKGfB*n&Q?7!!lzG*y~o(;yb=#Q-R+VE6H&f9$Z_q*ql@Ln%MKKh~u@E-ZhOU zB1Op=$Itcu0P9v%vs_OmXI;3!z!)4=$yVMeR(2VD=LeDxwKn9C<2fOKBR=)CS?A^| z)ES+9pRq}wO7Ux;#5OwQ*R)^rWhc<~uN#?_ceC)SNhi|1+_FwEZFfF)Hr_|0_zV6O zr-pnzW3Bv6x|_s$!`zk9^xU5<9OD2W!sM|d=ExQHzl^kx5O|-#QQ3G}f9#9d?SuIl zP3-}A*8NMNWMgIVVpHou027pYv*nE;opzQ-bhm&5RUN?O7#?G+uDnfKWSVNVN` zxnn8v!hY;|bd-|bZSMTKb~{hm@50x%-weD#;13+iV!pf7F5$UGmvp8l067Gc$rxZl?pr)ZuI@fV16Ti+7e+p}5PWu%$^0F39Iq@GSsHMMM-KHgZ2!l%1Yr^ua` zH>XwD=7xJEhK?TrPv54k&eh){zg~>%JQb)3rTH??a_c0Fd$}Z&UCsQ4TX_Ur ze5ac8--j`I&3uc<0yER=UW2GcEv@7&!tFJ_F3S?*9hHwMRJ{&nak1^-Rg4O$4jDt| ztxP4s+luU4A31xwfsIQRBA5v$K9w^z2TbOoW)a3rY60oet|l$$K)c+LxTyA}iru2i zJgq6tM&LLel?o*8zb_}fNovv%!azX>1RA3xhsafi-~c_nGw)QOx>av30)1dD=!XP)lj%&jgAAD%|nfq0TFFXws8qiCC<8k8bbwx>OO_>UUAGVIB`pO0)f& zVX&l*%t^?{v8_)mGlOtdwXk0mKgr>j(FqWlT?kx>7V5C zS`6bVa{2eANa8YLE&l*UInUuxUffw}dvhA5@_7Rs)(cHp()nZXnvFDQo8(+)zB#UP zOY20sc%s}HhECvlA6k}6^|(+}@OlreWnEiIE5<{V2-zbad)l*7>}$zwM%}|D(z+=C zHa79o6bL-!5v#K7;kM(NyBsdSEPR@l#@b74nUs=PmhLgynvXNF9MOlo%FP-=(Lk!l z%{zJIel@uSf-qoQF;UJdEr3g-BgpEHz(L3~`0Zt~l1X5F{$b7};{(#Ra7xE4>Nlz- zihnueRouIH&OeoAZ+I@^vvx9hnSU{{kOyx}RsHuS>0AXU%ZE|e`ijfdG}X9~B$n&T zWj`+%{5YC+ofg zXf0({ztI5!?XW!9Gsftck@Ao39z6)@UU_8#La#J(F&q*5mfuaZD6?Hk2g@3?gevYmSo9p=;}vj&oRnj`CUa1WoVG@` zud8X=C4}0Zt#b{XqzYPD=aFNL6nAD~0IxRqTk+4~f5tmo`@e_U5^Iw>s zvR~}6@OR<|kM%h<{{S25zAcZ!_wspKC7g0w{{U!3Zsk_yQ7V(;5b)oI0yFb z8RG1oj!`Ik9!SA^(Mi5${Px$+@;@ohczZIdnMaD1Xdn{plH4s(%{j&Jz;OA z*js`kwzP?1g*Ot%YA66@i6_*PTLj=|(!UkWaa1uFx!1$fRN&r`T|V>mEDaj8C`OfO zb4e{(;GZ6UXFrD@A9N#Q;|oz1rD+7Irs}a|Ph!#)Vs?gAl`^9!`54a9q?2FHhsF?Uzza|R{(v{`d9CM9b}n~4+yK*ti6=g z)Hk*I->0Jd&&RmiF~Q<-9JFcLYk6&dm+HV|8v391b^WfsB!15?Hj$>m7QNxSxjfl4 zo4Fb>9J@;Ix0nYpWNqBLSq|P55=rv!0(=qpx$!$nzSn#M;cM+@PSb(&?qE={mtIOn zT(6qoa#b=%exp84+m+cQLX3=2gK$GB!3VD(j2in~b{j6tV=NXS#&+H})jyW0_Ue4j z8n!b6<6f2G-p`}|05kfW_(Acr;qS)J4#(mT0_u@nA1zwy&6q9Ko1UpOS;~-m^dkfu z5ni%YB~p$9DHtGtLHsNEFz}C!{A2L{09#)W_#auk(KQuNpJ=m{qh^nAZH%RWw`YX@DkYbLej~J62a6dai7q5ymO7Ng)BIz3b>h#1cQENXrS0gN z=SiF_dU;uh5;P?g7LlY28 za7DR-1A)1TEZ)G7e#5{&GWV=ad&E8;@TQwKx8iM1(@E3q_W9RWvn9MKyzNlVPy0 zm387ZXNuxo$Ca}jG--i@BYe%C0)dnHNq2L5s9jBceKeOhHx6Z*X_(0*j@?!nlIrauXL5YsCJr{^uUhWDCVVdNC&gb3-FRciI$f>( zw9rc&a?2W-ty)o!HU}dy$QT9;2<1j=`PUc4Syc1y?GLMSMSPomC2gu8PP=ALW zKhtjY$t>V9MdCPP7*MB(?{=!7Y<<^WTOTmbC*-{s_FMP|b@BS^z&9Tkw9B6i#cq?u zwotb-Nibq9mhgX~7mws(>e*o4SY6L?zR?2eWB5Bquk)BgY$yd$eW!v6pZX?m}N^yhUi;vjaNQalD!w3!AXT<#JL z-eCX&6;D5wMJQW9CqHT6Z=)K3spM+N4Kab&z{wmXB z@f@S&vT49#<0s4A_k7F(b`zWqmFwz5FSUOl(p=ojboa7M?Qruo!^*^&8fR_oA#CKi zZ9afUu;-YdId zQ}>)l;jLT-4dUoj=22eqdVJUUGvjhjI)*P0@b&5Px3#tV`+Dqs=le%~$$t+&XnP}h zsUgyQO$j|>J>~fN_cFBIA5GD$E%dD>+AB*-NY*GIg#t$!D8N+%u&p2nsVX_8ju@Uo$=48UB5ae=y+U?uczBHQVd;6EXy?37CQ@Q1omywC? zS-Ca!`6mij#mCxK_hb2=A)ax492{Lb9`w$S$A1w#OYs9!I+nd~-dH$>);1ytLF}vC z`je7tl333Hg+|Hd$jZtI`68zzNoK`Z0 zjp0>nbJIa_KA&?1=}brt8yVUv9R}Le+8CNP44*DvKQQcS^~4vll0Yj71$Op-L!P3j zJne53VZU%8``nYe9lo{AV`S8ph4^QV#_6qQh%Kap0OgO(?VRGf2(9hqvBjJ3by5_& z9B%ELR|^%@uDGaBGJ($0Fl$)|u|X6BfZf5xAmv0h`i7#nO>2D&r4Pzjs49Oh-&%dm zm)k96dwVx88)I$&+&xBXXGPa+t`XBaWsSD5Zn*W$Y+Tw%>}9u*lQ17EISMi9k6P)W zRnvDz9wL&dqdcC|Mv~g$uWr=FUxkmH56-q;1a&@R9li0^yNm5I-|XIH>oD3lJ#cG+ zju}jBh{i<>aNeT5tUe)3@o^YAQ`F6ZSn^@CLJ`k06V69!^v^2!luZWQ2!J{6JxzI2 ziL6+8Yq?#9c<-O*Ubq)fZ7of#ZM>)fTwu4Ttnu#ZEgEJ20Bht?f+2Bm(D_PajRetT zkV|`Wim!8F{h;UjLECwiSfgWpN2wfR>0KNamP>gD+S?O^#8LFet!L^Mwz@^v+fqH| zXTaXXfKRtTE6;}Kr!9&%b~jIV6n8J1QaHB@l{o0x<$)gDR$iqG>azWU3ysegRg7`~ zKI0W~au2T4j@0|D(Ojcip#^0-%5Yeo%tdU~&PG(zwEN2&sXo;^ zO>8!mLxF?b3|HDd1%bce71Ohvd42uOe1ob^(MGXFZ1T**e%Qb+N47}M=U-L)D|^K7 z9+HI8j`TyQ|g*c^;VesW#PkjFLH75C!Nv*5?~S79w>E$Zn_7i@0Es8D+)@{AxLs zo$&z(st!ot15rs33bHXQ_`yA@l&nfB$>dwj=F1^M%Y{*ZtTEBEO&)VPLkwyVsb}B} zfBNd3{Fk?~iyK(cjAdDxL68UIQrs-Iq)f=tI2%DupnW=Gtvv+dw9 zJW#hKoL`9ay*F5Nw$yF0xZqqV!vXZ}E6X+C4UG~+GnaT-w#LXt)?UQdxn5tH3~*f8 z4ZFBJ^!3e4sOg%1p%ROTm&lLijYk}T(>3T(#JFg4%93ZzQd`}mVb(a(EyQ;$KVJ2f zBC(L*VN_!sje8Eg;Ym~eCR$i|VUc-+uif>P7H5>-VIj?4wMJV3qG~;KX z%jy7&86*G0b6?cicl(tu@UO`q!kNeX zj4S^Dj$fJgY&OjkIy>(NAUAqg?Z80~n8Yta+?-WwaukTAk)B*ZFbVBb=JWyhlluUk3}5k9ytvI=nthACzi+;4G|vcYdQXD<2c?}oF7J}od89zBKLMm8Nsp)FUXOCQvm#Po*LO9*ct2FrEq}FdBt^5c7=6zCAOzrYa!x%fjqw-8 zXz%q4eFwod(%n3hqDwhkfP~sk20@+Nbf~FSMAnua$%rkabDV+i+L;>59ne`Fe6TVZbAo^R)u58b@eP}~>$qU& z=}{YVBY@li&(^qVS8_dCcxc645ty3LMFp5eh;z`B_!^dJqYH@{e9qgB4L#gOSangr z=CLH3&AB7bRv6(5u1DjF%~a`QSD{8{lTXdvvy&lij6*IDrYb9<_ffo;U`Q7QK;r}4 zR2L|Bc`=Yvspp}nJkL7k$b{}HaKj{HiqYzhd1%WJtYoN-NX89Wca7gv$BXCrx146Zhlh83Uwlp%raWeo@M zDf!#;Jk=$4a#CwT0IQz|YdH`O-0EKfF)wo|Kx$#wW5A-o5@l5eJQpY zQ$lVd7lByef)2d(tfvJf%O#;uoS`mgyEEDT7W{kEY?d7(#)~z@u-c1xwT#Kz_ki;1 zJx4wHud=l5Lc>+Fi%`>UBf7SkcSkHt9T<9oIIqpw z>y?{HEgiG4Yl}I*Sc%3tPw zh|_kC{MQHIpNLu~jcpoBGS=5o9kW`@K#>oq!6BF2ab4&+uY;T=2g@a9eyL8RD#cC? z>7N~b(Vwx$!+(zpKD*_%hvLB*^7UIV7wrtFRy4ap2%0rJdE!L_3g8kC&7ZSx{1d-V z@b&yYGWg=q>UY+&$@Xmq77|#mmgQ1u+;cHt1EPGY0r}Lo%D<~5W|{yT*SkZCvsySe zfTvaaH}|F4K8W%vaP|gx#f_XVCHH#UxeOMz_K-(yYZ^lgN)kBG1yxlT00F=NcCRJ) zyYbWEx4|6@>mEJT;IooQJe4g(d)HT>Fm{{YASG5DY2yT2It>rk6f)vnIh zdAJeAx!V+JfFp@AcA|g*B%HBkKKBOjT(gOLdh_L^p0;VpzSimL&m)PrqB&ajG@*Kr zzKhq)`WyC+{jfeIe$ZDbVCVi3uMI#XMba#1lNT!{!~U500(L^bE)`I9+n#IYJp;sA z?}fFCZxDED`r}K~tny&Hw2`5jJ;Q5lbs&s_LG=}oze@Yd_8<5s@N43?jU=1mPl#>q zF0CSVw(z0zE#$g(Vyh!gb_r2epDq?Bb1w1#+TXh3p_gP?T%`(syNcN>x9jWGv+{aR z9h_sM&r|o0X5FpmevtmgzqDt?4~^OsQ25`%b1l7;4Ilg^7QjVgc{~9WqeR4h`SMxJ zM4)a0Ag{W>+&W|0vh?0sr@+-#?VqspZ8io5>+JzE*oBb96b z;g=x(cla;<35}&$d`i8#@t(HYkAiIGj%_;D7ct)6sU(3YA@MhXHH&Rt!caeX1A@`FIY|@mM&N=9umllZ!1b>D7ZGN;Z7JbsRFo6Z zuHEg~oLIb<0gtI1O<1Wlt^4}-M>pY52zY0~8U?q7yeFpH>3U6)?1IwS0z`oFNCT-S zl20V^YvKO@+TZp^_%q{uB7cb&nx&t^{Yu2|ywwUKSlyZARR-=cu(XBSYC1BxFNWMU zf%hki^?g@B@Xn>JXu3@L?e&$r%?6un=^WQiqarkE3aDe-C!T{OfPb3*0NOL-_r*Vr zUNX5pJ@d4y8w-c~4~PC_hQl8&8CU}IWA8}ACnsoCIInYn^1OaOG4|MM+>ScRTfe)l z{PaA|Eyv)nxJ5$|Pn(-at649fPnM_8H&I6g)b{bplcZ9-Vpz#2;TW#v+!W!lk&q5L ziej?^!NxFuDr=HmKvrgRfD{r*{43e-zh{4qdRKu{;!XbmgmoCS1~0nc$MeS$1wM3< zee{L54YComocbF6jjxKs(s5XtiY@4_(!26M6Qzf%SDV>Iw6)Ui$oix9eg6Q0fq3)b zW{`X_FSotfgSL8pr{nDgYYQz( z1e!V1_pwCn)+wEGBb7!sD;_h7@qDXrd-WCS!nHhZ7Ec9NbkdY;?ECgOXG(M`rCu97 zoz?#UQ~H+pH}E^*zrgpMp7HCJ!ARVgw6_x@DBBSp&XHr2>i+YgnwwCjUU_5 zd?bTMyz#Du1-LOZmn$5)L=dYG7HP1J8y%`6l2J^ZWmlX{xU465aCg_>?he6&!{89y z-QC^YAq01K5AFnaXK;e+zZ%q1)mh8y|C9ks__^Lj#UQV2LMFP6Qjw<+@lT`m}wS;L^xInua+z? zqwol{-@F%ygK(5d_&7zA2ImRGN{PJI$T z7p|2*mAusNC*q$KA{MOt8+pU67UH?x%t*1VWrv?MYoB0f_$62p7l%WjhA`yb-{yGb zc^hGbc_E6x^}%sCC}3ax%Gr$Bm49jG^rNx+J9#obn|AcLQ0J+rasYI|>?iC;VGrj= z;=Z0MPpK;*in3-V<-zNGBWwf+q1bsNc7OzOF@7FJ3o(Rw+27X@;y$e95ey}qv}1)j z0F8R%S!LkCpo;vWa9yYIHx zIz$ID&mC9lv|Yo)xF86l)%n{#DLlb{6nzSMfnNQf5A0bkhIa@T4=@tZ`7|>Xzlq46 zWXD7gC14XzvyUF*;|E|t_M!gP#>-6kZDJfn=S^?V;P04GIE9Of<-{q9!0kUe&MAEs zU-=&EA&V5rOJ*nlasJAWcVAdwSALOcUIwqL>X9w@^kv#>S!BzrdS82e^lk-&iyHhHqh3O94hQ?Oz`~*t zevYM^=NTN;;X3o2-><`k#K?YXWMxWemUv$o%K~_I zAxo5*1#5I={`8ZLYVcBF`Wb`q{4WgEv#Cu9sy7Rd{^NTYk2=xKiC`JA?FwM z0YVL3`8Ho z8DsN8eN?$L=|XjrOtDx0Xdmf~QaWw*0b^@R7!;OJPhtWNwpj)kqCTAT57#G%P`^aG z(xf-~b4mgjbg9tq3a&Vj9iE_ruf6L8hj(ai@#0vHmcViz&sglQ;(YOV4N}Aak@T^s zkOBd%Ge6NkT)&q6fVFO-Oa~&YwQ5QcSlfvKHOU&^Mra36r9?spX8hle4Ud441CvTh zS4pyS{U*Oe9_9A3J+R1G@F7@6t@-8grA!OaVH?4fY~~FG_igf1G^L);w97iN2Uf5d z)|EQ5S`3H)%gw$UAAU6_!#sYJ+Jy5|_EQ6?5^qCweE^?54@=V_e0$iJ!m14`8m3B{ z;| z)%_Mx8?IPrPO7=|M%qeU$jboX-=o7re?p$HR!duY^wx4Gi6B{5RHr}&iAMX{Ln6~d zD*)mF%fle|_B7T+FF`P3jLd~zPRw}I5MxOqrR9{l5I|YqO*E<`W><|;CdUfgDK02R zv_^Uo9Qsh{EJfdO`O|vkc1WnAp+%YvPHvq1nHbUhGTIUAtIP535u(%BMqO2HequH) zkbFc^R@tatcX+ABG(-ESBf&pyw-m;Q|g@kJ~D z?8>x%s04LGCpmRIoN#4;2uK_$1GEynV3v5x)tXnx8(L#5+PcLI#^K17=}-zpbuY9d zK2}_MC}UlsZ5|ASJW}PtJAvyb;`adt&OcfUK+j4UvzOi=f&^04d(%2b!#^`ca`p>~ zx%wEh2l(!zws8NHI|X)hN@XV1OEePS`#qFlo-V5wuW)&5>1&gI+fVp4VpM_ zLYJ@9%HK}>><9g6;<=1qeImU9b96TEU^$ouGVk_*;2%D%tQ(?U3ygb=5`f6kxAbEj0 zwA;Vy)dh6W_I17(c?yq(aCNd!VaS^bE-Pse7ew^@_-KtZEU!-&)OsaJO#0;z&@ot+ zrO`Ek!JxHMo+I^5mi#_}p~D1yb-KG?(;ox1qWIEn2 z_k0L_IY);Hcq$&bnG}itay6Zf^8_9cYlOZO3O#Q<)M%Ca^=bCeHnOvRcDY?Rg64}^`d;CCKM*g6oCb!ddQ&U zd02;{A3M49@aXFmpgO=m6ezIdAr(cFp)eAkm4;zikXt?D=eRe>$1A{cdcAhdAYzr} zIV(VToh4_A9uJLrbmC0#meWi;uvw%#FR*o7?NjPD~o5WSDA{rJ*(5C)J|fz z{z6B`z<*^pcu?Rb*H^vK6C)cS-W?*KbpVFkrm1i~abBC>Vh4{3v0fy zN%^Xh8w{m%m_g2Yh&x2ZDRtAYX#Xo22lj}jdlW*OdW%qOyxO;p^c=f5ao~3JLq2zW z)&KM6Q**@+7s6IUw;Ld!B*?vk>aTP;`5aLIGv0}Yb0)sxYeBl&`#Jx8Ti*8AM!%ZR ziSV@o$G06-swq*~7bSLr>bN@(B&kf_JILk;Dq)OssrP_tsgCLi?7*eiaWuQe!n~iB zbTE5wE3oTE+(C#PhlqoXkG1d7pHujLxSHf7#e6V_LR!K;Q7lJZpBF(xiMksRC;52> z#tR0MyoI-iydTb~{{h6g4>|$JtF4BCV{yBWqskvJt5MVVyt}YgUOj34MOxvA2=hzo zE#996WvcNZ^?MkkyW!+;>W7EtiTeDp{addsjcRcL_HqIi%Wj~joc-zg69JIwYse6T z`)->%4c=s1Y*(|(&d0)~Kf!JJ0gNH?Dl(yPn5lMWOQJw%RX3C0m1^PjQ+?8-y^-qb zb*{?twOMDUL*0pJsjtixi97Zue*)J}T}Hn2piA$17}YVHwmz1_Bt!2O;C z7V2pRWsZ8J_nz6^)pHxpc>~FhF7ePUgJb}{KAna8=25h4lAzsr(blX}#IsZqi z6G>Mqdc9QH2rNp&<{@!g25!$!|Jz&6DjJ_SQx~_Y3!8L;=N6nmM(jLRR^uN4 zGZMC5{T4Vg-qcE=(uTVh@VD91B)M00RLA$~gx@HR6jTD8P_Gu!%j-7Jm&2qdb4mv} z&{pXi*c-CvBwT-(7-PCPPf*27Ww_=&j@MlO{SBO#@pO_TMKiSJ(bLXu7u(SWV6)Ij_u-qmfwD-;RzbM;DsR|jD#{$0OYP|05L6mWh8;!$;Hl>K zbY|`~wVBa3lr=Ihtj$upA?L0&Sv>Cv0J2%4?ust)`7tAC33dQ?lIKf-^3LO%NnPtO zkS&^a-0&*{XNWcLgeyLPi@CrDlYx;Qb5FU=nwqHXKnf8%VVi-PE5cCz?pFe?CZ~(C zfA6^g&o_Ij*W+0)@;vf4L_b|K=jm0{b&E@14dv3!DUcNHbM&lpUI4MCJWvYPp$?PI z92UC+ntCr)V`EabC`F2~)hj&aZ&|Ucodg<#QueHDDVFSAVyomm#b)rmM#_Y7`T?I^jd+*k^FH_Bt1e!(T=_F>wb7@Mx*kiQdMU z`LbaznYr;^zzpYMuJ-5FIj@?bMDpt&3WiM7Tro1;D`tYYX)SWoI)>p*YC#RA_*3ga z?P%JO1aEuV7|^U`TSwwaf_AQHoo|R}ru-Z4e*YMw?@6rKZEkJvCT}LnDSpfkw|ieI zGNUfvgFdzJNBE`18Fp`bB4p@$WYX;_+YX8Zsc=uOc2QcvFFQSUn>3{f;W;f`oaap} zbi?L#f1it&IUmmXP17ZZ&(ED)%BhkNK2WVcyz0Y+aBfX?Zm`5l49Kofm?k2^Coshe z%6MnlcWqHRPsOQM*FFhufsqGOXjjaLS1iU8zyXTnNX?&4;$*b}23g12c_cE3jep%w z1OBF4cKq40?mlAzfXKoyUpjl1mL)r z+#!MIkjD?nArYFE6iBjI5u(Ujl}alY&@#yX(`1TY*28BI|EKdL5dM_v?O1KCg{ix< zK12NUjNT|mnL&%35XX;PVumrl`mlL>@nrF0|1M#j0Sw?sIN127lzi0M-(VgC66N|r zi8+3HzDCTEhmoA%`Xcx)_Bkbr*>8EJWbIxp(|Ihd@`{&mP$DPWLm}&cCa{|N@ zl!y(b#l%xm!oxgLd;I+i7TsE%^5%zs0h@*KCq0ZYQ@Pb>J~`*@hSiTEg)hpGqR?-* z?csh%6;?**Ekg&2HVf-`GE41O*2nk;?LF}W$UF~2>@cL-7&j0iLhPsT%kqKIi##~6 zviD=j>aU(u3pLMQPTX}&RBWGua%k@3LwG(ev){))3ASe(pZHS^+R-KrX|LwY;Qpa? zZ>rJZleJ4l8i`hgkwD$&*Mh>E_Ji_a(a=x2#Nk4e)~}b3U!AcVq>&qT=+B57T$=8m z#bH7h`bz_Ycld6Nh8-gW=;Hhwe~Cu4#!J(AKb&kK8ylqSGlEa4Rqz@821sr91J`8s z94}vK!;No^#FxfmdUTNCkV<>js!O(TDZt&F(+-2Oa)qv3-rpp;EgxReq_&f=10?#< zZ?QxMO`VZYB+;h$ge{hH@7f)C??g5B@~W{ShJ3A;ddypZmEODkNr&Tu|BTQ+*T}wz zz`dLdc9kaxxx3r$8(2(k$iy0oMGMWrcqh=KCpiS{A>j+&{z+2wBx~`!GQPQg@?Fqn zmR7BPN!i!2=kk@2&epSLWFFr^RyiT#-ErK9he;eTfwfky%c2?7CQ<%dka&+}Sl%a1jtYfQ*-3bI#ln>6qDd*h}tg(Ed&=O${sq!Ve zN;@?lw%emKP4zA% z(D1mWy)*c#?zi0UbRFshDacA?(@cv)7axTA8S|;+hjwU$<0p74Qg|o|XCRzWT{_*o zIi6Q{J4zC8Q4HOh9t?xYZq$Ygslf`ovUGkI()W`WMf&x*VtS&tHAKPAlB1E)CEzQ& zYBw$|cB+0dpnC6<=Q)J`93St!svfspJiX-Ym*TMU@SZ3d)_h6$XELN@F`;fM=hJiN z6H>pt2?_DT4+cVwm3y81@hL3~qwVjBnLUxgXA>CL?ww|1{iihv>o-vRQw{Zt(8tg_ z*XPWC0Js3mQx6g9o9}HqRLj@oUdOkTQzV1At{9X9e=`!eka>*x|LjK#R=-}{&L#M1 z-ja>*?y%qbgEQYUO6hZS@biO`TbOMBVrU&}62k)A>J4A8-j%>gFR%ebLs}8f(#g%v z#k*W7+6^w<+mu>HEGFn*DDT&f%(NCP{K)cw6UJc)^pmRFhLds+pzX~$X)kS3%Bf0= zrV#<$(2He}r~5n6(tJzFzhJrIZ7YF~G{T5{UyPW81rT=zfvH2)=|G6+hx=>%q2mi} zVrVOCG$F8jZ(GTp^$+Ltf-Rw4B1jRF96w=J0_h9;xXJLV>Gc!zm1wYNOW=@E;s$kB zV(9e-(gl~Kf9Vdk!j!>qkd9^+2PW#On#MbE@INW+20Mt8pZt%7PMw^gTZPqUII(ROV=}Y>BYOOcj-Vm>i6T) zPP|yb10ybh#3Bd~C;!NPX8TK~B~)D@_JZDdcm}~GbP*cm^x7AApp;QZDxf#{qQB|+a^^3Ja$OyK)F9N*t%E+~+p*z)SYYw)-AWAGa=N!zzKO51E_3%hAaY1S zP_SX2R_e;=>|bD_>*&n=BtD|w;)+2c@Fn|Q(%?eQ`PU2bge`!Erk~2Lel4n#&nTc* zKxC-LVRL@Yz4*0!}V8RM{=|!NC!U zOh%?FDEe0!>NxlEF(};(C%G-(dGG{fOJhr8B0|xZQGdS5!Sr4Bt}Y+g&sIa>aP{ z7af>N@DI+xF)FfJ!{(e({3*TA_98y&&sG2r{kEB4q41xFpJ=s0y5o=0&LICv!a%hn3EWC3} z2sTH5P9e`==$!aN>5|C&Q#dn0NotY#W_P0gttQhIowQy^zqgoOYTgHD44{$w;f$g+ zh7D})!pove!cZe%QG`>|c#elcm!rf(3FAmcJvJT^I|J6$#xaiw72zxt6-5q$F@707wTwY8mKPO-g;Ic5uX-L2q$>>uLRx*<*Yd-t0bnlv}wTj#pN z>n(p>^Rpo{d4;*fJYg=aK#?h<2yI-6J)zY?f{H=dzmia8n#xd`Gv_sub2m-HgYEjR z?MMCdCSRRts|R5gnI%hLXtI6*tcQwY(O^^*YPy0ARq>&Qrsv z50(UHHqL^*W9#v-{n(@zSjJ|KiKVWm!d$I1z-h=C|Mbh1Zofr{1uL!GC=j z85J*_mLUCM#bIWAm9N{u;CPHJ+41%e?e-aRCb1({elP=TW9tD~-lc;!FO23o%#PdS zWWVJAdm~+*&*4H7OL5QkNli2YGUA~#3iv{Tt`i7Cu&06A*^t4A3;A+XC~r_>0Yg2k zKTX@W%=o1r^;d!~AdW&39U@008)|<4#vA38K8^drg zFLvmiNu$5{rgD=Kd6bO(o1)M{3z@`w-smcF(PE}q+;qLfF|4rRV(PM$&(mFO< zzNloncnd8zV^YGCTKrnCAW)YUFei$=dNQ(KmC~d1Jwbf2EzRUbUCpH!Lgs| zCT%DqyL8LbS_t+zLyX=-2Fl)yb4&qRf>7_Qem;b!MV$whm}i))*@_$r<4*9=z)J9qrAn7RtjYk~em#g*smGj0qq zLj*HY@+&~0EfG$> zRfL>O)x-#@x#Y+W4kc*1D#ujR0tzcrE#CLuLxnAf>qv~c)TB9R9G zq&Tdvbx*;+zT<_0rTYIAXFeCBEwslmnfjh{UMje9Oi=)%&>rnNPcGDx9Wv$6(q}pF zS&@rL59iKZLXuqGo0Jo&qfiSo`(N_)T_fyAVyjix*(dA31_ z%crSB$Br_qsV8gbiUdX6i6t#a2MTv^uQ2oGAJAja7E#K!mEgVwW9bo8Sv~CRVa9u) zkd=bJH99T6-RwkG@teGXUd{}w=}w6s`<*B=lidK&MC&3xPFdWQ!PB9@Cl8;Se-R=r zETgM($uF4q0ctOwq3}|ZL+RYkv;Pulm$MY}heXW3Yd?$Y9RCZLwpanL>7MX}BQ9e* zxSi~WW-{y{P65BhcP)l2zNe}HX`ijWXl(S-#xI#g%(JfFWto!0Tg1XAw8sF2^0nMO z?F}_d?|LCRUPl@SCzn|X`r|_(>ouKvx;fI@(p$s&ucdyKjbksq%LJD6uE;-5PGH!c zuQX>?H|tX53bbMD!wrK@%DFwB7Le|Eoo?NX2dg7iu;{(DC~r;T1YlpY--^h#$NmEt z)(SD25=HgIQjQXl5wx^FT|GN=&7sp3$61K~2T1&KVU%Yn#IBCwljZIe zAaem099?qufJBWgcZ)@aYy4d${`Sa~ZdnD8H?}Xeytk|i*KR3rVb+NcjqX(=2YC~t zqv^JmodX40({v;91@!fP6cwj%c50K|FIH5WtxiZgpk7EP_NK(87fgdyK1u`k^1J#Y zJ{J#JgTJy=m-p1%y?96ab{jLsA!&(TP>3l{p7>NHLd0QkQ#wtAw|#m1VG02uJ;qWs zW6{Rsz#Re=3~5Av{offs{&%U*VKUS{jzQ;|7egc?#Fsjqu(^uCuXij8#1l_Z=)b%B zpI?6Z-q}9xVBWHnG8%JgdD)dO`#c-^z7^&`y;-ANI@h-gETvO?~1+C)Y(O(|jjI zQD>A}3LddQk~Bo1jTo%w2lZ==9C<6*?I+}EEUZ7jw~Gpw=9)GTzu34s^UNUdbSRa|;me$U2 zfz5KHMczcrtuvEo+k;rDH&`Gr6G<)l{sa2Ah0_1yW+_t`N@8Z{|jjkK0N96y2~#cFfG zgr9|Y!~T-MrrXTuD`OFlWG>0eH^Cn|;wd|t)6K@RItX{8a$-uWWObFq&a}fs8g9_P zNOwZdj3!va{45W4Bj1)psBVDup2g>Edta|;tfp8b?VJ!$!-C}7#K+`6@Rs}c%0xt$ z0`S`->|3;J{fwAMzvn!-d_mYkur_H&x|0jx0S`mZ3TPb>8w{+E@{&Eqa6Tocw$*LL zf_w4CKd>VB+F6H;1L}dqDGI;4GQni(^MQN6$Y4djcsVNPj^*%_TJHA~>QgJ?hT2fc zwyIcYq^Lq&-IZv#T#@hpap{gWg+LT~*>w#e_||A4gBs-nG#ndG-M&({k+QCy^s@Bm zlYi`d#5Zt&OGdH_qw>WP+H4w z{w2Bz&Y-F2C*v#_n?Vz>%a)8W#0BXP1ku5q#9Ry>+`VxJHs377qyzn57=rC$a+WUsUG)vIcn$GW+Wy+Xt}NzhRENByJ=r`$ zTe~EAJkq6!W3QSKAdXLd$M{V3`Ss)0^l&$^N7%Xa<#Y5zn>BD0Wluq;&J> zEQ?bI!5jK$&5&XBTQTzvsZA7nsf<;e3suXb8VBvYIZdNEq#qPwi3r&Sa9DE%B75gp zq#!Xhg+@CF1m6P?^gYO;>tPeNsUV}+ST$++g^_KFQ%QBeGxil;)43n5_sGN9P6igR zEAt<#yP~H`uFh&*X>WIaEN*(N{n5>rJv1>kSQ_#(1)~yUr?iHLpcwNqfbfe2?YL^jqmZsFa0w@0qPsUT%>SUu)i&m@U7)yAUJW>Q9GM_=THdO+c z+m$Y&h-ee5`4bDgP;CcHyBhnKTKmYYqsmU~T$R@-w!>0{b1dix;eeL0k6~jGOU83= z(RhhWm19ih?FqW?CLDnbf2WGFs+}l0G&<4)egCj*5hwb%%2EBg9{?gd!JoaU2{(GL zNVqm9mAHwtG_!!D`1e4i-POo$KTn)-P)3F$mdhKB_$mh;_VzxdW6ORPC}+%dJi_CP zJaT5mk%}wf{|p|xD}ixQKz8zOW;3WxEqD-Tn#~PC=tV*qSdlJfKIG8b7}onaAY=;P zIH%i6oH~a%LQZP;1L={NOlj#0dt*x%%ZYK)$G^;DQB`fwp9KRT{!ryzX4Zmm<>SRf zt>9*?0SA@Wr7B)Z?1YVit8wM`Ox)WmDyGt=@}(n@@Dd+63!l*2A`4S-pzv{_oBSHXu>HwGtB0g7F}to# z^rEqJY8;L8ESiNqL7T;z{?@cv!n6V*3qfh>36M0#3)HO$$v0_xNV-t6rXF`pr3mbM zjaaUqJqgIkl@4bjX5Vp0Sgu*d%tv^e_7bMgw0M|UJ=NWnwD$@8Rn%S?y<90DmujB{ zJIO+d30HKjIY%hBW=PlB<4^u!T>CCpy2p#iQQaSUdO|!)BOaf`z{d6z5Vfyo9JoU@ zwZq@`_uJN_(~qOaibpJL>G29p>Pc-g`}*nt!U_Bv!wmADlcv<+r>M#}_rjNyD=WYb zNzf&Angek{YzwF_;YE{c_a^l0$_GkhDcV{N=jyHCjH+hZH zQ)wq|;Zs&e&`e412S0!che^Q(PN1Dddn>pbsd5@NFL%4EyE*;mwGGTIl zmU5Oo-LPlHW)5~cXYxRUVK_b#mEjb9pdPOfgta6;aCl$k>4}@9YtMucMfFYEzY|oU z+dHM}H^IO?hMux1-!Wn5S#|$<;Wj~`%wUuqnZAUkHTO_~t>8_$ImOV0j*s(Mz7l&S z&NE?C73k4(zXVa_5j`_fB#B1b#YVw3-}esJE6}}HtYK{SRp7_%K5?W8e)lAhS2UU; zPAkctT#V@V$J~wLX5emQZE!eg10x)?*0TNs6zj;I#wQOTBQQVI*d;p{T0IF32|<;7 zyY#nSX>a#ruZcQ>a@y>yUi~{uu9HD|i>i}G>@RFR=1d8*m^Dc9FAGk2GU(^Yg>Glu z=B2M2FWG2*@Rg?hwb29|hcuW?O=Rx@I~6yDfcAX>>z;V;L#~#v?-#>B}v*CYEvbAGFY?HR)|CLcv^MK|5!;$_(JksY2=XoQp0$ZXK$%|a(I?^ z4Dshp=_LoxfeCTJINA+1f@#Me#+-qg}AOt?L1V)gM+^3XJ7K4)8KakTw0M&o1 zA8fxKenXys^+oaZ^2*~hL?Rh@CC&uBLSRatDg(NPfdvLEbnrR0TvE`Bu+9F@o8P27%LKea;aT^?!bGAIgDKqvtMHR5E^hN(=Y$|dr*!E2NGvy3Z!~5;$CH||Kty_d;1ti zOMX$!)@ElAaB`iszN^zP&53o25#j`lT}KEjx08Vv80y#d`GD1!-}rd&*~0> zN&E=%;ex)X0>Kvsd6+%HDQZZl_+J(}dw+cEW+(=r0_ti6mu~Ot_sT&Q|5r?6t9R^y z%wPol7jcDeU+d9@t55HCLk!Dj1s@0&V$@v4U=!e}pGXuv-{NV9t8rJ_dDReV54`ao z5IGGp!uR}@Rn}d73ckbVnK>9=of&4Ay_hU28Vih$m=XXjhztMpY$=%|I;PvqZr~#} zd-_oE4(h^=#$spU&7yAsJPP-rx)>|$)V-sijt(A$jfw2 z;xKM?f`aMHZfuC12ee3#Bs{f)8IT&>`EN{Qxnx{1dEIrRU&M+gjU4l0B$71M<1|t!hR9wI% zssfLmsFEviO?G;L6D4Z^`gMtCWdcO7ymn+tHuM=t*d3<^P{Y1YNJqBKQmuoZ4%>UP zs26V0xmVsLx!~07##Je)sX3p|~Nh$E*6xpWRD z{<^ej%{@n#o^Y$WL>1SaFW5hfv)@nPqpF3LvM%ns=au0o2?&?8d^pG_ZlsFZPV4X2 z&A@QFm$jgp-00>Toby{W3s3X@8^05}Qg=SY`G_zWT^#0Z&pOrVCA6S>CRZlA>MS7m z^BX4C*!+DC>~!jErLzNt=BPqm(pb6Vl`hkGjLS%7(TEH?qQKVg%o%g!1)Br{q&zc^ z%9PD32B8UJ7MEZ>hcB1T>K_S{dk)_WWCu}Fl~VW?ut3m_GUBq$LOp*C((co1e;aid zh(C+!N}`O+g14z6&RRW^U!H3gzrKD>cAK!26Q_t@q}ZCP38s71ZW!bROd=6m~84l^vqOx4Nr)NGKct@Oiwf{y=Wq2_!g?)mQ=F~vN&{xy~3 z{>49)vubSIXqP%%g0BpE=ptDKr&aHvlmEVHFC>!D>-{m<29&jd1 zlYCv*G!+!(7bfSqS3Zs&CS2C}h?LcJc_&aF``615j`q?`6Qoo;V(3jgGoMwB{eFkV z1t*l%0GEU=*4#x|J+GaJejRexJ=++lBz+ZS@wa8vSlnMajyaY-2AKPhl=o_8N;fxk zgdFRE09SGCJEHNZ^w)gtcy2ztVLi?VWRcHjJUP2o?LDcyxV5Q7$%`a^d&drQ-Z=-! zFt|jCHwCSyjk`n1tXeXoP3ijI<$c)>PhA-Ibky{s_VIAbtzSDGyJY5!M8W8 zU@W4cF6Cjy5B1TvLt8_vmK=JwcoFtv{jIvI>6}p5%~rr>lr!kW!*q1$txZ5hiE&}- zC6aAWH658-x|pT#c=O1`w229oqr}#PzMSU6b{AxBtqaWmefIRGlx~lbpI;(}jC(_# z@U1uwRzpe!s;UA2;KRA2yNbX0_vE$;Mq-Wo6om3e7z7w6uyZhxqyE5yKEltNBDgvQJ~^O1 zEAIpp^zKB)j`Ghi;bN@`Ktf>5!prC0@k<_m4HQZ861ls^{}50cG4HS`t(e1xG~!dk zk&|-xEHSPmtFDEKDPRJiQ{?uLwaz>*tKVU5MMMf%3H_{FIv4zes!}$r!?IZFCr&ur zto9hsBJ6IYIgRFZM!iF)*k=0}JnqhSi(XNo8AeRc-x-h$KQQh@2OZZ}4+_V+-(!sc z*5glYeg-zYDc-m$oQxb$FHFZ#qif6K<8(I1k-1)TgChR}-4onJcv)1+{juEgN=Uz# z=1DeOVs3!{v$~RDQd~`ZCQx$+<0dgu2x&C%p$Pb z$~9Q{m!WKnR~lv!m4v3;yZY>7BpY6!B|r?X8iXQxM^sZ+e`91mFs>LW!-TE4Zg zMiDx9>8Vq#Z;en%xmq&?(WlUc1#pj#R#5Yn%GaB8Lz?M zo&Nh7j%)&&;DHYb2sMhCWfVnFIh^^*zF4~`h8r;|h7AB61gH)|^!Ls#bB~|4D37Z+ zw(s`eBzg7FiWnFiLV?Bka7RZ*)J@Pa4dKbLK!`$uz|a8h)NhEdL8Aw zz9Azty$)*Zab&DZC&i?~@h-+}?#|pe4DwgGrtC{jEgz+pX1-{{iLnJlFa8c{`+chw z9h8i+QA}t;{h|y>db}kITjKqJ?^A^p%!Hf-90>ir^a5AmZH`DXuNpi;%bTY<0 z^iME{q)Q_~!b1u~(BwqQ52cB-GFG(io+KWYP>|+vAl^#bd~S8J6M7=|uqhLo%9pd8 zUc^b}6~mm228co8_j@sHEbSOy zqS)+OOLVC_^xLCxv|nd2{qEM5Zpj~|GINvHSDJ?2<;FN*DdX&(hS1j(0mODUOfPe9 zsBvR{Ml$#qfJNR);~aBWAHM0)ocOsgtX|HyB3h|b3vY7dqd@tQ@at=AZ{Ya6EWhbc zfH!~3Mri0NCXnGs~D#2%8FBcE<=S0jsj`<;)O=t@nUv(>&?#^ zBuD7rMBtSMb1lnW{_2k?4ut}NJ3;Vz<>d}YFYB^3YaG+T1S36J+YO#L#0tZK1W$g$ zm^>{N1n(?tTkH`kp=U3!@pn?c7>fh>a&vA*{-;|6S@ZAL?9FT?r~XKShgxr@9k=26 z>^6*6*Elo>DO2GY)5iR#84hLIyKidye?ShL-^PBglrFcWSL=L>SOd$6hcUN65M6@$!$5uF3T^;K&%01&Sn z@^B#95t#2}*|Ov`wX{iBO>e?|>5h9df@mx{QQ;u%geIf$ zWS^_GJd&Fxm^-070SAaNsxVVCb`+0WvynqCUwTvFjCm-#HmKid;ySW@*BWpno#PTy;Y{; zq*TtmV%pxkZR-?A64}%i`KjXJ!q1u#oBdnzIEBkh@Mh%*|H_z>)zW5ob_dWj_0=M0 zi(kiNg&x`Bkn9f|TC3U;rfn3DgsMS@yP6!M9)~_`q)mm|p^zIm-g5K*0pCC%zeJX| zEproNdf=RH3dg=HP%-qz78fJUfaLtGv#&ktQ&qCH5v|3kZR&7&IdslEHmCYk_mPQ)a6e@Kpba_Rz>jj+B}jrQ==S#&3yj=<%;@0#*$q_;X9SNiv~;* z`4~8_oIK%PzFSd7mAubC6Y(Ga(ETsSG^897US1^$y@nX=FjR+2~z&VZGSw!LbJz% zN`Q`)EW-@L(XmZjb@`tSSF{Mh#3;P&;V`7z4M@+f_ucOy9ZdQ)e$XNPHaZ(o0{Iu)PFz{F&K zdz$D<#~Xnph*UE|pt<#|8_hQI>wVNw76=CLImf+pQEycOW>VguV1GL4yewmRr;p55 z3<2BJ@t^UfQn#~@Ix?ODIKSM;@jLI9#%%G3q#yX{uMoi`C>b^OM~gfW9rdizX-l$a z1Q`DSgy;EJm00{aveG7x`%YB3yCfZjepDyhKi0j>w>6-tyY77Te2WtaaNvuX^A}^c9xGyRBX}jt5mBZ~(?DCFVb4xmf{lBYflr z94S5O%xfY}0h7!v#z`6csc+*y+1G9c3dH%q>%po^O-!eA%sfXT{{U-ADh*N6glmQe=hj+cEgl zS{zO;?dE%V$4MjMxa0%sJ*qi_2?D9Xa7g+Jt~}gfj%#mbxpgpgPVY+bh08iw>rD(& zF$$B9mN`E@6}`F|?iE*YR~+{h;p?%{qkJ;3!Od!EdfoPdg#dfoG0&DxW{eJgMn8Ce zvVQ|!P9p~QiaHf^maO&lh%Q2K4>b&y%2bj$to<`yu+<_H*vaOn0G@;&LJvdrtCHQn zo8^d`Z}+puon%ZZcDOPtxk2#G`%K85Ap)pz5eYlfpdGHVN zs&^Lu0P82qHqb~J$owi}b-m51#I6ob?Z9NOYGOp+l?a01}}06NZ21x{j&NUi|el1TNZZi&msyD}gd9+~Ny#Z|b9ea~`|Wc|Q4;C>|LoJ4Ir z!Jo^E;-HrD3u7Et2j-CAW}sJBmCQ$!#(x^CHjbA~EYeEh-3p8z21f_igH;vp zY9`C~OtB~c3?Is<&GxHlf?5P1@>u=cnx`4Rf;epBCUigpO8k?G(tDTriHHq@f(R$_ zt$A!F%t-8YDQ1S|>kvo_1Ld8|f=&-U*sUXGAh(ivq-WZ>IXq&dg|D2m+-(s8Jg`3T z80u=&@W*v(Rz=*Ws67oOQS5}G%%xZ-jya<_Vb7M|GT?h2pIWGu~_@W(W{Iem)bXwOX6z0IVS^2sDowB|vHHy^qO z0~F&M0(T&}xq{hXk~lFW{BM*DFRx6Rs|;6IT8v6-MeCX1`e^k8{Q|^Ku8_S^9MC zV&-eN-7avd&GK#aEu0F_VzR>5R}rerRE)1VQ`4tf0D1dp5?@_`96%EoQ^5z;tJz+k3)?S-wymAq?o?9Sx(GR>hN{43B7b!uJxpwIo_~b=-&*|FGsrPGOUl=Z>VC_S@lG2b6&XHR_C1V%MhMM( z$@@Y6%Krcjzi3-H4~Y_8Ggh+RVAVA6vD|#&0TSg%nLv#|51peSag$$7jzs`sysQ>7 zz80)&;pXEdvq#Y4@RV^hof`4HUqj&^+0*tI_#GEpM3Sc&vSAh{esyPx{lbo&zt#;HfG&XxSNhqw0yLUN(-9 zz3_rVYPST4T79R>5*7J^7RKT^#eX*c0JWEjzCP-o6K^zsj!gDfkOXUeI@U-2%kmd; z$1SoPE2|ykNJjE=oMQv}Tdyeit?+xmUl}|#H--FnrbmBj?y|=Fo++W4JneYpjI4-8 za6n)f5~m=Leclk_8aYlc+Sa6|o|pSk^H=`7(c|$xOCyV1%F>XkSeyVd zPo;i@e$8L7cZ_~3iys*LMrk}d;a4%w6aa3#)Zle<`?9Tw6FUy(1(`{}QUR~Kz6O86 zKfWFOCGkDai#$(%{{RV<+*2cI33qHEg2hnhb4Da|nqqN>U;^M3P)RlV2M|<28jJ-1 z06Jp6>xukMbBERPi>vQ9`6c?l_$SF^Tn5usb1nY>m2LB958ywCUJ>|h;p<-vcrQ$Y zOt7?yID*80NiUYWL(JfAmP6H@$t03{*Kh_p)Rc!E>+#B!B{{m1cW-m_`f!CfsKHqw zy$y1{F7daAJQJmzS6{qHBf;|1Ig(T8ia9@CgSoFU_@nXLK=3JjCrjI@>s*f^3G(72 z{(3M62mO)Ht}Ee9XI}9Ki}fjWe-zwFbu5SXXzjToUqoJtJ%%gX$}$Sr*(Wt)iaKGtBWA+KzRle=dN$VwClyrm)K3m1csHvI?&>%C4~I4P1<+;!7`;js`i;*EJ(X4?V>L zDakZe@ELC80S8jU*XdQRypcQ+#LA5MV59he>(kRUa^X|SZ!m69IUcngv{vgHy2b}} z_RR!IyAa*SzS-1eH{&IEJbQ}77V5+{_T0VmpU%1~w^0$UL+w(@axHB@Wnj$H1C>(0 z!}YD&D~;-MPqsQ%n+?MRaC(9t zf*6--E^r6A$*Dw`O?CDKl35Z$WB{BFL9IBZBFS!27A+#2f(GH5wD83$uwV}uAoExz zOQ^0@T4YOpimm&z_=Y*4M2fS>%G)v|{#kX$USf5c^=|IVjp)ssay(>?20mzskHfm#%5w!@zmp9nfY}-%bsfFm+5{M8)(yUOidy#0r$l}WY{W^NXgUnol^Z2 zUGI%oK3r~h1_!f_cYZZ`W}apbD#sTeaxsOgF~@Ic0)@%-?@XQHb;2BzU393WxyxF! zdli8rs}=*Gz~YF@d1pO&q$Dl|I?@&cA23|{*IZG=msT*y!45$+_CLd{zk+naI3)@F zYvba8uNC$OfP+Wiy)$q3KdpTK021BMg}UB2;5(lWRU%htIO{12juc-q{g}kTWo}Tp9Ko|h0 z>N-`N4&Z)NwYoB82;XsF++gCMnI(X{>foqfxKwrZ&sw)68Feb#26BDsmDiZ<y9Y6{PAI_uuI$Ufr z3>5i_o<>D#rO?d9vWghNmnBPdR8T4RlDa`Acf@h>a0xZ7G*eB$jUrX(;{>0;bgpjh z2L~-FWgSU9D&x76MSBShA2q@}W>5J2h(326qyhLEQf%7hGdj-5*62)y zfp!?c9+>v4h%Ln5=~DSgf=SLeA4-<%O|+IQ+epZ9!wxg=P-&J3k<0z1C!Pa*xg3T5 zb)8Lg6WrrGZ#em1aV!mbHAEkX3 zUCOAdo^~ox=hXkv{V@wqrNN^Hzg#~adLxbTx-)1`kNN7K&b+48g5b9SPI<3h@q|WA z6}kTama+c;I`T`HcBoqXPmN#vD15(Ge^2;Hra-^PFVLqsS8~hHoYZ*jND!gsrGYco zn)vhd$vp`oKx);!v&k;h86@_rF(%%6dRIqi@vLgFM7z73;L}R($2%c{&N#}zpcxyI zed_!c-dx>-dVMOgGBw#kuC70gPamCV!33!Lq>#wD7q40j(EsAd!+Y z(yk(fPzUqPRJ6-mjAZfOtzH1)zoBq-Er+DPy1yg%yEgv-f3N;2er!V9Mo&RacUFsV zVYP<_m4gAm$@HLkj+k}YLEr{Gah&WIwMZR8(%>--}OO3Yms%?LLz_3mq! zl!%vc1dnR!{4C1SNXy1nvc*5#vwQ?^h)nGAYk=0RVS@9_Rr%kNPpx;=!+3^I^x&B2 zqn}#f!#hhu(9C6ZW^?mKs>a~39ASa)>48d|uJt$rs3NM|sghB+3=g_oe4zT(wgutZ zt^;EmvDe=f>q%VjTTG`k5lb)(bG&puf~#J`Z4^lo@PB}I_p3tA-U!6ka_%Y7(Y`tRgqQXC5R-9oQxkz6BVpewvCqK<;Me_27fxScvgKn?PL4iRDh3s8goq`+?LV# zj;914lzw$yNo89rzdkV*A#wtoj+ES6FmIbf#Jp4*T}oIAupwjoa0O6x@?zbbU;)4x z2kTlMD%8qq(uso{B5cC@de=1#)Rzd&fTKKyM|Qk^!qi!s^B@o%6pVO>&Vng7~0IoB=j4o^6n+V%2&$=Ay3{1*!HX|;MCErELu*? z`cHs|9crB5g;!-K4lo8sKBpbMtIBkb7F=meEphX{Ezq~#BSFBpzS?X{k zmXmLfA--1YkEd$+iuh`jr1`cz7*&MyMr`_8G^SRNg2Onf62#G~1svmw=;j?U{zXM9 z`G~5k!8knQ0+OAOI9;67Y)4)x@<@s@><1aEH`;q9)``Adz@KWTD#FqR7%VGTG)F73 zj|BTJ2@#i#ixDO`8LdkjZz*MEa>7>KgV22{Pc}v25E1!MlTOkUC{?U%})<{%h{+k#X8Fgd~gE`KVuhfX_HW|1akxP+2d<7W*;XR=rU>3-Py{G6G&960nZse-nD5n3X$r}JIQV>ZzKNA zw@9KzJEhtP$@Cx|YfXGX8blRQo8|#cdQ@S+_=rF`V_??^i9J-fhb~ zQHD%pADf@ZRSG6l7rW+bksBn1ette$A-iKMq{{ek2yLJ)e!Wd-$7t^I`NT37-4+Mk z1EBm3Az^bPgA=yV&gJ0Zg3R)5Mcc=yz``Pc2^f5Y_Q9)?Ng23~-BhZKAE@=FCZhKv z>|)i>E(zScW7@5Q0a>MxmT0g(RQZ7V=B3KS*^RrI;GP#>xW^SFi#+lN6Nr#=gWjRI z5vzH!hHeLzr>0l9AI_;qdo9$WDb$4rjBr_f$6-x7kfK-^^IB94O)^#&7BCnLBg+MkH1NqgawTerK_6Xv(0GTj0u_vYv^P`N5h|u*E(9org+NXuPoeziueL#g!)bW z<^KR^ay!@6_Bw^0qi-GEjl_2l$^#^kFlAtU#eQi^6}0ONr}u(BV>#d-U#)hY5b>9Y zJQZ-48Z4$uc0awck&lw&uNeD{?mryY!RFj9la)*_+`h_ZysTDXorap=& zq-ZxB0ZIlb3t-~B`^VoCJU8(7O#5!HF8SmixwMS_`a|^o5$=Bs)kZRtQI6!)oM5@8 z?2g|4`qNCbmj3=_g5DL~B#v;Zqq#NlAH_e~67O4@PY-wkSuL&9;!Qyg*N}G!k;0!| z52bkzj{YThzv4yXKBaLbrkbu=8yJaL!2TWM_{i&%o@k$` zes`X6cQSNveL9bZ)m7T!OuWyM&c`f%rmRw_z~+*^_MI3~rm4jrpH{6pvvZrf3Mitq zMnOdsQvj%BxQZuO+~J(y3Q3-HWD!OkN%@IvWALXi5BG%vY@7@n(r--TJk&;AtP=#2 zQ7XqSW`%}N1gXzI<3P#XmA+bqm9VOSZST(`(-mEW(K3d0C5{GosNOwG?F%GwqQD2v zjJZEhXd+h|GEAu=$W@5>hSSk~>n<4JjW$36u1g$r+L|~fZK>dCQEG%xa9Tptzyh48}4fYxF8U7=~>bN6q}lxZyYW2z`ILz%`cH8 zs_%9J?9R{*M;#4x#x_@GaZ2DMYldOBXpOK2dht`n)4H-qo?bd1TvKcTk}}PMlfeTO zAWfML#e;TZky*EK%3VmERiN1 zK|ju?xn_zpCc?_2Bq`_c6u}qn4YI2=ZS#4Dmj}}n2+z~48xV8TNvY;eo4y`1#)^snw_=v#P`t8_FJ~hOR)Ki<^KSC*!Hc-WWCm{ zx7njuO0w-N$+7m1xIJ)a0>{{9cArpiB#sFiWNt9LcLt=ezIm;@;cRWj{kwmt_N_)+ zZ8t`ErD|3o-mgXxx98GUGmLCEt+-Dy3=+&g7 z9S%BG9OZUsDm<*^_Lcq;4N#d<5=cLVT887C1==%#ny-CqUO?oK2Vikri*vQ?LL}Y+ zBY{p4nHW9+0;2#R0dpRq$erMS1E)r6kqHUyh>_tR}dj9ZWiXsoV zhCEe+9&w8J*)@9~RFX3fc}1ki z*(Y(+f-Bu-^8*u(1%6`w*31vZaoqm^XWRb(TI<3=RF@&oh?gXJ&(hbpj{4Cqtyo+_ z<8m0=h6kYp8m${i6ktA3u|Ff!{PnRHsBctiIi*eJ07>OZrCDV?dR_m{#B641Es`i z`{KFWJ1G@zEBiZkEYcRp{`OR2nAYk?+hJgWPFYoQSXVmH>NeKLKj%)&W4A>ZUT}af zKc;I=Bx_q(((uO9uscGG;2&Pq4aB$Y8X_jf8 z$DkGXryFURwb?J!{-E%pHpnCXDSm}nV@Qi40&siPt0hKUN-_z+$;C!unV7L&Pinlu zLCSM(MNJlJ|~`>XOljdPoQezm`vUz-w& zegzm6-5n1wXzR~4()ege=1yf$I2#D`@1N4S%nIr}K2Mn^`=YV0{3#t+NZPboI-2v} z7F|OW#K;@uPCyyXYtXCDyia8;*YMm4nHD}mso}o0&4stL)alF-!-uks^ApGz{KtNF z$ioi~>14)@rTAmBV>1HcY^gAi>BO z&%YH3^(n0hqY}KUcCp6DZ8$!Z z!+SI<@`c*t10&Y6d$R;(BRG+=LRQNIG zMcQPLg>o`;^vzP13z?K&#=j}*eKA%bEZ}aFcc(m7vP+?wsO_mFkh5)mcISc(E2Amg z12?(FK^%&iV5PIu2Li0cHOnCl2rJ!}s1*#QC2OI=Hn~sU6;&J()jf>&Iwi{7X-+vD z#z_d@p~v7WFkhgX?AuP<<8k$?-gI!}lq}2BoC?Zxqa|r0YE`WsW7ceZSEp(wIER@W zq<-;aUAtS`laPBF=uTJ-roMf-noEe*Sc^p>467$le!i93cyq>z9c=ZYBF+^h{v7-I zX1pw3TGq7D-%A*!qdibW9X7Wh)+D-Gt))D@SqhRnQxfDq@G!@y#ZqD{w8(@bbu}zVm>?*@_Np?gOi-p2 zf-{kxD&Eq_W6m-$!KMXiCP?=futJ}luU;xEdy(fcEL(p?$4Yz}Y*wnmA%f>D$C6LE zrdzNQJ-$9={w=s4oggY*8{3&?haOaFP{hXsI6rtE^K-+^(oz`I_2n#S6 zIO72K{3@lbsEXVq-|0mdGLzJi$Ujk5W;scKvqf@HCoZbF1L{s{oY6--e`vH{v`9`F z!6QGJz^h7-f-*hLRhA2h z<5W0833p@L9jZC7Pwt}##ertV4QSm<1Qwf|l~Or9>t(T40WwNKGI7FMYXbqvIF;I@J4FXY=EnCr@onPBrlTZZuKCs zW8Vjgw{EwRHt?vir^}PeFvot`t9P)8AqH+zKbsvoRZC0R1TJQS$xz=QR^7RMX*n%` zCcKDTiw9k#p_3$FVMrg=n2Tk6OhuAyyp-mlx+73rd2CTmL4GhtrA?;WT3fS8uzAn- zr2ha9YH3&pX^}}Nk)(-ZjdGzuCxCkMS7)^|g93x)u*OI0SlWu+6C5_~%8vaNPb_^; zVOrL zp1H3FG{|UUAGF#>)#N-+jLYVserP_2={#<(p_PLJ$ROkpX;^?VPCe`5j{|&6*1Q;> zW4SM=X-EJN5icy!R=gc#UGCv$Bm?)K=97{9Uu^4xZwQk zJ-7$2HSsp3ae1w3lWQ8i#Fv+M4ZcYvJZ|nD z-b<{4QC~>ho4NB*@(L)TlHXF=uu(-ZH z!;*IKlg(hfAl@z8F)FG+UflPq+MJ1QuW>YzZ5)I^PadRID@%mExASAjQ_~%5cxZFf zzd~}Vq%rw(%|={L3#ncXM_=%$qSMtBmXX|Z!yNUdTEuO`BN59I0O?veT^Xe8OJ%kQ zmINPkaw+~(G~0>bR47BvWPuj|kt6rdrAgllTQ|x?FKr0RxZ{k0^`L1H zTU|lAJ9vXSla1b#%l%H;#$B?N+B$^{Tl^}{p9r>qBQ75wF~@&us|B>zLgUPBpfMqb z-XkBOq?qL!7ACriOK7EmcQm;XAU5$fQlE&_FsY*;yXs-LwPYs&?HLCLV`%i@6BXfUfkbIXSi}$lZDU90rV8cag5{>UalVr%C95J#bX+> ziDEH~`_dADy3-YjOp0bV9&6BwNoaX;lWy#?4tiH#45bt@o#9iF&Q#;KYT^-!vGf(+ zSl3G)cc>HVY z{{RCGz7NxKIr7i*uaBVIP8+3thv25qg*5*F`sMv==Qxx25%oP>r}&KN#F!l^JjOi- z>rdDlpv6G!W0hkgJBC58j*`5OsJ)R^{fW0A*EwAk?tGFBU~LuKLrua zZBN7cf@+#Y!`ocOL03cR&=JTZ>sbMt9cjUjdiLcL%x&Jt`_tg>?F-@kRKA1p4llCZ z%;_9AQKll4w*zoecAWbG?_XTo-$fiLZwq30k1$7_mNucxN$7N9(rW^n&6c=DIIj| z*E2FjrJkRBqSig-m-n7w+%S3!o|QUTc}82HPUz3doH7jhcdUzKx|TbamIwQ9Ax3hA zM?>vSwK}94Y?941XhG#b+(7*3ZS*pl)VFgaaoS|;Qs)Ywa(#PM2<9=wOhpEIsb0UO zYAm*b*hwRKfn(fCu6aJypQl|k)0*LKvq-y>Cvd{3KZiAA>=2cID|}O_z(Dy2(DbOLl4iG(*HRAdpamfIG^}X^x|}DE zQ_Jw&#jb?LK&+}WoPI!8&UN$`k7Nm;N|FU!U?9)TR=$1n6Vkq)Ew`{oiH&R8pa0YS zJftHuv{&mZbMfb~%6Pwx7^xWzGh?{rXkv)t`&& zvP7}&XwL^ckxVsvj+4#%NCA$_bnQ@Taj)3!o;3;@-bLOH1~J%otFy}_dq_gSo^jU| zPf%NTu)M)->2EI?;AKy0HRuf`7TNR1jgilYTLC~3$C3!iIO=g&_eR<6{HR!Yha7z? zL&XU>)+K3Q=19T9Ml=#FVJ#D+GvNGEd~VaGoF{o?|QoQrWYv#t=aJ=Kz zn6X>Dtbk=UjAz%qJX*?FLlT$W#s_+ta-v+N(4}Da7iDfGQ@1@>0a`OU%Wz8k%HRbf zq4lQ8ZY`w{%^5`}1$uK)Y+o^o7F3TLdC%!dIK7C2aaLu-M}!|R5D4JbJ;KK%oQ{=d zLoAmHUPd4f;yiu=ohaQF@&*M}kuFW@jh!c6ztbIjnHpG!SX|@*{?Da))|0Q<>h?sz zC`tJnNy7v5uL`r2B(7tKNaa8Ujxkb2JoYyqX}3O9=)t(;W7D41=Hl}8Q&{eLI6QAE zw9lh0MBYLRh7B$TBz-;QnIq;dy4tGn0eD} zNgmZYv6{SDLR&c8{^$Ypu6-eyCb$y^{{UIW?wG*)zJj{=*BPcu638&snyIag4s;-8 z!4(geNij2{S+FXDDCnu#-w`nAeUvMGDL0&Qs zaZ_GfV25TRF~Lj%8}q8u3#6G1s0fj$U4&p`(wuZ;v|5JND1q6WvB@OoCX3l#*4(sX zE!!uI`_yr?;_0{j%mM)9bpHSvo>z(745XX0^5;0mwGu>`VlwEG$YVlI=|~5swoON= zOM?@}QJ@Ku2;5JmD&5_b(u6Qc<$!)+jD}xA4N{A0J6+C(C=i&2@ql)mWDi5w){0sS z(2+=ZcCJtkpmElg3m}eWhBasH-x=>qa}wM%ep4rq-!z1r4mwn^T|6gh$GJ%3%R9LG z(*}+hUG5#&fmFa^omsL70RFX;YpF)_$!IN@TF408*&%`VrvCtB$1slK(ss8B{{V2O z{Iegz4r(S!O6H-s6JN?cW(b4~ecboWRtx6F_WuBI#(rKn=C;Pwi1{d^9^Gr7TLpqS zks0$j-b*ngf4iQ8*i=PWqgp70P)4Vm9I^JOWt|w|6$DJ7qL*;Xp5O|lEH^gaWhlXeoa^&_={6B?k0daqCWMZ1>aDe=dPB8UpWR~C`n>*By zMJJaW`y#!zR@&K6K4l(wTD*U_4 z0y>_wh7%;reBHu7yu5s*^Uo)>PKYE0kYo<^14U`BZ()u|t>fIgA9MFP73VhUs)2yP zZb+|BGWk~XDCCIP^(VD?s-43=wXHo_>0$NIc_4~y6jxeGIU?krE3YPizX1E(R+|Td9Pa*sTmAIT@F$^{QHio%YD~i5emsA&}tKjFPNaXW$+w z#zZ5N)|`BVW=#^q62}xGPBE7M0PEAB^A-)gSw{motr+76a>hw5$IM9-Y4$G^rY2L2 z?k(HsDj3Yr+Jv)PM8TmFxl&b09@)oC`qx_wHp6@&ywZc_H2EZw4=rPe$X&Ma*m|F8 zmIc~bTma4J0Hh4hmJha}wzQq?iV#Ur&H?GqV^zW(5(fr3FIqx(v2UCI0M%D7ZLi%} z*vuhu(2<&g8RN2;!ErCmAPz)_KU`1*bKOr1+sv`~P_musL1zcIPS~qgH?i8;o56+i z6tE0%2^|MMwD@oBZ}k|on@r1dD+NVSki_~|7vsG*S<*Ep)Adp1?jB@uXCNUY1Nnna zj5+H2KD&k@tEWraC3Ueve-`Sq##9L71Y-&gN8?<)&eHPal6snIj3KDl;dtwjUhO0!O zGN0M#jykPzJ3s)}Q5Enuh$4-*;00`C16=r}?&x*lz9VwRHX)k?Wk(0nmf|UK^X((s ztyhj#ir75lvBx+#KZYr-8#GvnnMvesB$1!3am85dmvJE*oP*k>^1{ejSdx2VimI(D z5Tq78&N|kVWto0z+9Wq+aigVugWzPxg=_)m_jU3KTKgBlV;&aMzw4cR-x7ZTdY-OG z?@~>jAp~?Z(x9m)lS8_YS3J`f&2#&&9BR>KkqU0JB%2mVY1QapS*&y-$|F#VslA zNnYzb@TnDm7^vu^7B)1Eds*=P>S|gQ?AA9CoQH+VXX*jLA6omf;b-kFWv{^ogX6iP zv%CrsE{=9i!=`;0`jd+M`=c2nHI(To4N0w!*~2Bp%u&l_Cg@~P%E)pGo`ez9Vwr9& z=Yn^*3_^UWa6u3B>-g8_$H700-YW3#h%~Pa+}u6popUl;s?6Cjd4wn+ka%BD(5CzZ!NJ63MXMp+t8%XI5=CfO6I3UGfK zu?^g2{bZ4ullYI`2h#vmEk?^wf&!`K&hj#XL*hgNw#H)>`$Phl@_)+ zABi^+&7|ByBaCfqs{!+JYv)~q)P8mJ7mtw#h9*~ad6k!QU$wpy7&t+lz9$j&`48R<0sQTMSG0`Hg(TN8i+)iwh#b8oS`De7IhwrqiA~iCtrjq6#yE z&ow=qXyF@f%nmXJKU%kL2$8nJ!?xj!AJ(>vQbrOrRQ#ZN3Q^XYqZI0;xr4oquFMa! zuh3PT3aw|%3-qfs{RM-U?kO+sugLy3&VS$Qe~Mq53<@wPqPtH%B8+3DbiN(~e4p;u zHP@Qyd_DK%{{VKeuly+;Sa$uiZgkbjYf>s)Dlr6+_}5!phPwck>2?sT2PXiU)sVP(OBh;B=-X#1ffOA?GTbaxwhqa*#+Cc2FGrx$bImvB8o+LFAd1X;8a0w4~!0A1Uiq1$LOE?5xk8 zJVPkG*Ct{Ld1)G!#xhFwu6r*TD?0(6pGwr%%gd|JEPwzufS|T7ep(p= zl4L8u$4cr_yE&w^5#^H#g?D50qNOnr~ixHbDq$>htGY~g_(mF?1TYesWbi-yIkdx-pxF?c=wstGS( zDtRTFzA285u5-vB@mbp(;4+L4O6o~DXmV6;*EDyzz-aJ2`3Ad9+|zjD>ceok8KgzV+&!8u5gBw2f_a1adD-{{RsB*UaNG+0%U852C_j;Xik& z+%hW+Fv#>Gv*)%Sc}L+|d>%(iV}-{&SC@8=O39qkTP$-+AQ)*npQi$h9M(*>?5xLc<5w9F;%3~%71XQeE<{8!pF>L``Rnq&NAjh(a|<-Rd$rR;g9$0L@f@G)p4P+t{-(Bmsa=1Ms0wxh)xw zazHj( z++kP`V~T~5qSIRD1DlnM1HT01e|QR#%6O-=-aM%1{{WVoI9=bBYGKK#cJkE6Xy;E= zjz-4fGm<-wY0zS5R#FZ~#tkwixH1%tgM?l(da?eM0lK=~5_u$V2XRsIsRzH-vzK#g zD-ZiUuw|b5QpN_|8!-cKw;e@4bg3-TqlOMLsz*cXnw6rCNJyOnk3us}je!b8Wq;W{ zN99kSOQK4#yo)S;dn$%pl1?&xg(UXZFCwfmGodWZTeq;QGg^z5+~^L|#zt{UuJEY} zto-AOmhY)fER0_?uGTjATZLcZ>C-gbHalx__Y$~SrvNx7)6i9>L=LU624jNQtlf!l zWF-M)Vxw>eIPN`-0$Y}+iC@lt2(4pt8J7f!f#wjs22U9^DaJA?oR*Q?su$!8Fdef> z$tLC6*a~^irE3{jz;TUk2hKWnseZ`vZH^wGcjli4#vq~Gq@0jCW9wTs#@qrTU{6Lq zhLypQ+S!QL!nS8E-e%-Qko)+?s z>T#j;b?O7|K>1;M%~c8;v4h}bEqu+BK@sZw8TRNA80%-xNKBABYGD)PcYk|MzxI9z z5MbqW`fv_sYF>STL!-GT93))owQWj<4B3q%j%qFC96Rc3b$fYn-y|7^hE${oK4AF) zkM^6d5V_;>L2ZDmZrYNvFhU~2;!tYaQh8)3Mjga@{IncAL?AOM>}S=j&3`Foq*%*e zisT|(=&r$zRojg(F5t6xdRv7w%ocJ!-wL#BYo$B;>`)N6hc`pkL<%I4XjaS=?zI_= zKW8Jr=vTz_C@@=D@kEmMVhhuv9#Cx*yy_^h({A7FtGwvm+&&|D&WPkt$9_4jtT!#+Yd zpj`?0#Vtd!Tf<7u1aZ1}!bWY?S)OOi$HedQmQ8kn!@hclL<7r!F}IWaU`GRa`G)lg zeq7dm$Dl-_d}@1=X0t8uNduwdI6w+2o9v;Y(E8+3g6Uoc%2{I-fCuY`t%3c@Lti;AXNL=AKVxF3Qtop z6)dG2w}iSvJd`Ey1L*Q%=dW4mPQqJEPN2Eml0WL3b z-e$>WmfrU>D%%+~N7FX^pjvkN7zDT-D*Kl5ZEDq^rvC%-_Sg=|3&UcVZBqNF98oOw zaA|g$?=@({w!xKl$6TB?%TGhFkF+M|whIoazAv$e+5E(l6zrhoO0HAUKRvw#tYa!G zPrdLU+E1Tu2A|nP@1|Yj101ku4<&|b1-7^R(&zoil6TV-?+m026nw_^{Ob*+pCuax zLXgfKn+si4afID7_c|P(*u55J{#$MD;ab;M_``F;Me+&{<^FO#w%XeGeJ|}OM%~XZ;BaVk0Q{n6ag2WEdWh3`SKfSYHxr;yT6!Dag0{5XbE5`gax%S zk6#C03ooWtJ{J?L8>WIog=u-W34T|MQ?A5sJU=xNK3g?>Hq|hVMn>kY#Ys4Lt2@0g z<|(iq34_av9?Y8Wo%9E&ozfb=M>3gSbe#b-WiZ78yT}66rGLKrUfnUrt7lSaDbgDF zcR@g-S`v?3LhiTsw5`Z{wk||!2%iR6r{Mcq($e&DO<_%ICLIy=0tj}#T^!a%;7qK) zdHTU}tJy_Z;AiR^*#2iJYEAk})h*Z4_^)9Cd_8WckWEhj`6i(taPu6bi8rySTkOuV z^_@D~*>HC0i}F}VF+;h5S|=imHsboAhqLn5-31O9eDiBOcWu2qlRB(=@biBCxYgsQtq2AK)Y<@=I>cnvi+j1RJj3v+2wF10oN5 z@elAfCx#X1FG`qfp9B5U*iG%7>golOQqZ%jrUoA&pkY@blQ@sLsSg~TQm15~ zTNs<*Vun#YYSNI0|0SxQN^z(rAfDx>BI??|EK7Oyu?`|Z&&2I<7?>`KJGt^%%ZObE zB+SfEX1Ft^c1!#{HHa6mYPca&D=~n;2=Rw#5O*oI7tM2jeF%?09&j6k|9bB5OdTov zHzY+TFET}HE)q5XDiBvMuv__1ol7e8J@H_!Sm9~WK{3&y(wDj~l$5Dro2g$>io9Rm zS{y0SF51mKdgA^A93YqB)`7Fs&uaIg?H%XsDywLIGT4l8m^$TLuxa&-hrB+xyj~sv zo%^f=)~Pz)Aq*s<0Q^U|&exl3_u9x*P3<1@ncusj8}9qC zdzxN*SFRzUp0ejQLUAIzH!b9Ym~9lw6ojZaN~tm4fuF146S1e zS%7YwB!y=v$fDowu0V)kDD?NTFDvW1p(DT)KY<584B(I9}T5c8*DdxMM!GLk2<>i?r|K&~VmW25pOzcxn73_T9XFbBlPI+fug0OC zP-C8q6;V^I*k$a7fo$OO@xgD{&0K+}VzAqHyc&JJf6=ZwmvJ_ookG*gBlxVOp z%_JVMPr){HyOoQLWLyQ)AP)6jaIvLvF;ihQ2GWSVM>Z!*%0(}q#0-lyITFNBZ)gx> z?+$ppepDO0$munUKp|%>DJLA#F8^gVe2&*TC1BxdR1WkIfQfcFy)zOjuJnqozir6H zPG0N5viEvu-bwOzJCR0Lu(wQa-;3%#nMnKKGzke?C7avWx@xhz4m{qhMR7?x?{btK zX_l_gC)Tf@r)0*yS`)geCi%3g!nkj(>fmNEDW*5PXV#Y*hbv+67A)B>$XLJ_=X(@2 zX_2&XQ&4O!pPLKH4bHqNR^6A$Q_&~Ck?Z`@q8x4Iwk5LDdO0dNGh;jf;S`Ql{*e#ddy;#FGmz-wE0gLERrxI#wzi*F zF}VYuOD6~O+#8ri1}I+sju)y)qYRe41&*1OX8z1YRKiZwiaE|gh|9veAncN$HsYuRTMT*4%P_a$pi$UT-FJWP2Ow+Vjy+e9>s({_S z`*2Z0W=9B~eVU8Ry&LR~GICe(divzx@m%mLj-1@BexE9_RB)uv{1nN1hnx;&c6z}1 zDWO4?11l+OQ$oE0w)9ZF(gr^5O;p} z!Bg=be!chM8P=J}D4j)uf`>nCWcHMBlrw)em(So)8#dd9kvgVllxw5VEiT)TfMM8H z7j{Sq^JRzUA2T?3j84=z*fxK&bxn{LS&wc;sDALfw@WQa?os(1uUp)@L~YG17H>K+ z)*8e1IIH1M7Di8-%8iSab5lwiSMF(4bw@%kko~7XLwd6HrwClp2mGn4%)-)((ro9ZI1P2oQB!u)0Wr0GT7HOs7sRx#rO#%(F2!=v4u2 zh>mx(>oxXuH^~wXy)5q-u#g_N>h~B*)ZF&UT6fh6WTgXw9<@z=KEUD|ppZ_H!y#*>@?}*zDDB%Is}TOh{_5tMfCfZw1R7TtBm*_3b9a ziSn+fuCGWqg%XASfCpc9g~|RLG<~4xOQZ$Hp*!UwS{?U$a1}ETYb|qv0;uKg)r^d5 zA)7`G*I!pLDjx!;n=0jc+4Vz1(er}4$rt9!@M$!3Y@8QLO3Q1#{sH=?XntRbr8vg% zZ9IOuPCGYkZL->X8uh}2pBgz!5?XNSOZHs3+(WICKe0()mYG=aSA~Q=7`k6p-pD_O~vNBwh)n1GKU5r8+QtiPeo~I<^-nCsxrDyDpl7w9vN>5Rc8kxt0$+K_Ctzn~ieHaD>eog%Dc*F5o6DSw$NJ z3{q9P&lgr!o-U<^^h+B!2_8p)Ws|6u+cB4r=9NhRZYF{cRW_PkE}L1qxn8JDX0dEd z7@r3xNTzrav75^t&GNblapOi&sX(=CEKf~nUM8iF3zVy8-uT2+m%LRB% z8%B7+=Lhta4XlLwB?QbsyboNN<{d*9j#PhI+=Ta&eC&{Q$z`8t|B!zTb-6vGeAD3^ z;ScWv+^HG`;OcxkD^0;8J+zRPh5l?=jSe^=0n8RWcDjENt>8QFIWbet`>0CSMG{0m zja{Ohig@Hv_(j54&8N*cKYmfMvtw6D`B=Bu8KpY;daY&{&wS>d1iOc9jG z6J(?`G{BX9g(1e>Af=Vg*}pDIp`v&A4c96L_S6P*w$rk)LI-J;57R!|G zEo$o&m{_Gr^;YJ1!poQ^JL22#Sajm>r6HDVMtXonqo6c2(=GimVQZ~d#e@CbX@=8F zA?oAyKLFH_)#y=k_W#%aIEvNq5Kt*9&aBd25*0V-94$-O&WznM33rT!N%8m3W5O%A<4+~p*`c+oSp78p==wj_3 z;134z!>f6eJ9bEfc>o?capFeiePxEXrMaf%TA0sQRhS0Dk{nyF{WZd$OaM?^7UbIW z<&KM=htx^9&}UdD8>n)+_9@x0r591)c}2@8cy)f+)V1X2ccfwXMpY^gDuZ+TKiFk> z0BFmojl+$+Wt=IT4d2g>f|(oyL;aI{4lk*;%3`@CE_MP=m`Sb?G9jBvw*+!q_PrAg zs6N7*SN$lX?%aTFH>@==ezN`G0hCG)1}S^g_upiVqUd)t&z@1b*ou;Wrb%A;qZ=x5 z?=O)LuhV-}Tq=4>Du2tZD{84K6EZOUy(;7U8T~}-cGWBW8oua1 z*A3aC#eQB5;$>dAf%3vKgeFk=EGPz3AI&?h@2nXPUfoKW6WoQ&6YaO!oi`@>$#I`Q zUy^vmWHHJW4Ikf)urJD}cAA93jcq|!s9(_7krjU6aWuc;82GWURgzm!GckM3?w`{0 zbNY+;V|Gx(EVk^!HZM`a?3Cw!peOGJCVvqAvkl!|Z?_lQ()R81=gY?OWAK1(Dn;P+ z`h9G=TG^G^v`^>VMxoJGBIO8TmGJxP2bc4Z_!kAXbzd6bH7VS5XQl_K7AKU}I--kl(@;DRJ76%!9sO>YbzZ;uZ18DQ2@IHKm7CNb<;UnPm zxf(DxUOkICl(~iBB*dgGGD5WCxzYCB{FD5mL#J`7AB32pRy2E~@n=qiUw6lBwAz$# zdl#8dxJ15}b8J(aZs3h+(U{Ov`c!N`gATyM6%o52x*cL;y+*HO^>}Vn%8l(pCNUZ5 zWT%gl-e|hDpA&5#u6V5bIz0{?+U)FLyeY4^S={dEEL%$h2Ed~nXG10r2@8L`L%*L$ z60UfGF-{o1@@?pv{f=LsM*Vvpv-Nu(f2R8ms04##8TXYui1$e5HHzUaLtN@UqW{HE zY#LC_$t_0aRH|N&p{X^`S|utWYCh_6Ojo>Py1?9pU3AJ+`T2zr=hch{>YL27D^3ys zw@O6Gh4Re{(5a=>RfiCLq4a#l@P^6%IZl=uHF2_1RdI2ZiaB~E3w?Rth2)GWiWq!b zg?6lv&gisIrXn4K&+T|>Fxmr5UHuu@jS|F=oNHq5YrnHbXZoss0~4@GP8Kq6AiaW! zBjp2~6dli}lk9TrQ`)uK16|6~s2ad#QSK<<6CkB-NWwlE+?>P$bZ3iy%SZB&RUm z2>Ii;if2rm3I9sZ@pEp%4JhY>+0f#)t7lW@r;27JgHXyJwL&4GT6-b~-AVsRFXR(h zQ5fQme2%a7GXG6lD0~V$xl?Wb2;G6-?6{qp}uj zUua!$@&TW&n{(Q^U;6P=B4XX8a)o30`J%YXxb@8Jgi5duBL_CX!uCX@U%}v)Ton9& z?dDX8&KW$N>ysiz%~-cnGBBpzu_fEXk{hp1LHg@PD9M1QDEaeUj6NUEUs8tE?!Sc8 z>VSok!A9y(Hy-t;Tq*I}f{HYDFlmLRM$wkGBW3e2tr(V{??Jr2qdvoJk{g>* zBJ1P39n(9`DsFoC;e=O>Y_j{LwK7o-CN^#X-zhJFZ39Dzypy-Obe_k%DBo#s^;k;6 zNPOy#@Mp5LkX{S*(>EGvA|z*--`hy8<)- zF~uu#G5C8h;l{jUqg0D-3OPH=)I_BYWRKrNM+S2e*;!XYKkb@P&pQBTP@qJRcVND0 z@H(&ZA%!Ev(*nUnT4TcWQvI*=TDm5!Ni)^4Zwz=GurI2KTwsj$qF)0i+ zp;UsUH{&;t%e2hh!t;KV9fKFy&yn-~x1*W^?}R;JpeL$7)>2^=aN1a)4^GYe1HiGF zbp_&C3n}&0%@Fw|21oU9il~MDM{*U=eYUfO_241~aNTR(sLyT6O_M!QeegPl0Xe+Z z!T~;^H`@o9#D#0u04ftsNJ=WJf6Jk}LPvyG@Lpv%{sDgfr+zjposy+NS0gbgg^qmv z&j$x6Q25yWbxeN=*zwtm%+I8dI>p7O#&VpFGwH_WiV0s?E{0x@v?W49h-W8V0z_ls z_E}xcG~NS%Fq>d#{!H|e#HEDG$p_D+QjsHN3u<#a4&=0 zxq{9I7m3ZEx<|aHAI)`=o{&d7a=}9L@c+ZdsUsYAl2=n7_=W*Kwv*X;bsYn;b8^$y zorY&!8ErhmH_Dwh?UzQrszlzO$nj_AzOUPdu*lF4N{UftjN>xbZn`#RoVtj8mRHg% zp&OnISoPxWB?>^SrFba|Bk>RV#ewT}$&B9af9d&mcH*3GN{y#&zBI)j#-)*;XF`An z2@MAiNeY$9kp;ea(kfT0&n0YiF_o*JxsT-5=DHd7?CP%%D_W0Pvp;a`ZQ|cXZK}}& z{J8uGT5w*^ZJ2uaGuc)Z)MPBL|AK6$U~Y5`NLV}@B-giu8}isEJmp_QOGmQLaITwwebugohp)*OaD9QRkRPMcg=v$fCpQJGnAn!3_M^M9 z6X7+hH}6OrXk|lk+wMy3X`vW4D!iEg&Vl>dz-0&(T~Q4xfWFuHVe9V$ky>NQj{gss z`#%8XC+_R#K^*XE6Mq-aa0AM|=AD6^O4W)KxxcHp%{|<$51cp@6-LzkxQ*Pxbyb

fqVur)!^utdG}}U@jlO$4zfR-yhDoY{M_@_U@`oJW0ps3z1UW9TYDAqs z=J=HOHEPlOptXir#Ty@~%bWCETSnOgGf_8aWauO1{xE>#aR$u)T+Iyo7D7l?FirYQU`#8`$KDmkyCV86> z98jd;mtns3?Cj&ReCBOzcI7NMtLUTJv!WW8s#H&=EI{Z+Om8}oR4P`zfHWMs);0-{&z-+hp4e?;Ik8;E#78=7OOyTcnNGuZVez_C;8KR&*8RuBf z{n`|D2c^Z4#nZ5;mY5Cw?IxUKxU_JWdhxga<+H6hy~&oYWp``!1UjE5I38KFUz(J0 z)fG&hS%Z+~+gQFWIq6x7XJ7h`GcGyGI zUx$>lT`3 z$7}TdWnGwI{eE&cY3pP>bm(+R$WG~J)Esd>iHbmmq9`Jf;L&^fBohX9OeL;De3qx; z1VXB)dG_}4GL<$}E&)qr?E2arf(ciCDseQ=)NuW5Ry8g%PaJ z#~x4jn6>fhwc>G{JRymw@p=CFO%2znv;4(;v^NyUQCT08jkv1BnE6(^UjewVH*pHR zw3drZ6~aDzD^P;&-qJSQ+Z^{ixdNU96q*U`0*AUbv%H4i-8wd9|CE{K%@V%*1B>3u zChSO8fTZLf;hi*MOGJjN;qAt{eCkP98$E_Kx~{HDWpO%}Zt3 ze9&Uax@B5AUAO9jfN_;=hILC7;bpd{syLo!FO?m0CjG%%nYtjtodADKy37_rv3IQq z8hY^vgh-HErs~}2LZp-v6xmA0wL2NE^GO}Tk zR{a|y?k9A6fVNq_pZ3cekWflOF0ZMS1B-v`VO=-3>ySf~z*2)skW6{^pLa2N>W(zVKv)tML7nd9HB%PcqlJXTu;)K3eK z@>VRJUJvG{;n6KEj*Egp74t`mvO&I%nrMkUTw;a|XGT#O@45*2o9`>Bs4b{*g@?d% zJf)PQ0UXXTX(bzVS0;<&dXMQT=97+x>gM9X6v?b798;e7+-EcJ%w1%6G?#ef299BY z-^g2h8)z(#Nb%Cqdn;sH6$3mEnvTyQYHD}_UmV6q;4uj8zGWU?wDV)M#+7GT?~%+a zqfEi#rbJhEGRC2+zol2$k$IUq0^rUv^$vO^Y*Ah>^hYJ|DyeQ;kf-au)RcyJ)agyh zkYGSDX;(SM4l`93=6=+hwQ5M>KDz1k&k*hKZrT`zwvcZbzu?*EH1OU`!>Bn_x*@-u zbtj-J4oiN!P^~ZryG)*X-TB_Jwq&BpdR9!Oc65=!m=PO9v)#7KVW0kfs ziWKE|iH3P)OdJrz(LfD>gl!0EXQ4s?4E!S80vzgdnOZ&0W5!q3r4gE$Y>FT$1(#+t;CB7%2PQ(azKJ7v5^WhWL-(HlD45GaAhsxS83kU5=^X0 z?b)tPc~+HnnNqv}Gvd9yPgPdL>*KikMu%|B_RY7W_6uzl&o!K9?b9S&+3HEf{>leW zG}$1|kScgqzIJ++rhvrC5rpYTrBBn(h1fi}+UUy<5Ag&d9BtdzV#$mI0)$ebl>NkC z+HLBUdzIC5&eFEchwDF0=Q4ky_6Sq{;k%BZ<>WT!*Xh+BMnHIN znu;;XaW1RFQ7^(l8JGgOmH{2CK zKxrFQo%6{ldG%wH4-%Sp3|uui7E!L4M9VNfJfMM@WSyi&CwbK=zT(HfG~mCfK&8i= zoz7Z3XZ~aEYB7do5k4Fiu-_cC@%ylD)%bn&TecOErtsfb>m95j;6{V_TGE(@9&NbF zA3mUEqrGC-w4*t$9B%oDThfbdDN|J>T?E{I`{q#Qsehwk`=(FUREY`sPj(@vbi@$B zE$v_|8yNR7aEym40Xzdxs~(JDQ0+rkVcZPhI^@$0G(!W&&+QimlJaThSSrihae1;$ ztUYReW8CC(fU0VzWX^fEs^?#$e=22t(=@+dCC6D1OaA!uQ8dhMW2E2U7dV2Bq=fFB`jWJ}?p`l24(v@}}Fwt_ECVU~Jx zJl&B8<9?#D<~xiufPwc8?~DqnwW+EcEmk%R*lTWJWUD-_cG~j~x{*42Fjolf zQdRpsscaZ|OVLJSOa1eiL21{3hlSZyW~s3@W%&0}t0h@bd*07f-9rPTYAdjTj6>N} zgRUbA;$P5<=AXNK*)n~dXRHwcdBcuklP_!UBi;-;;0M%@@CyB@qimjK+LUu!tazrX zKJ!&D_YEm-bFYclrb2nqz3hHQIn*878?$2fP1EZwltxFiAgwhlQv;g%Y1Yg_t{Cs@ zrNbuZyN$7if!_$Lc3ZGHD{4&6Sce@C)tKr_ayB>-nD znX#30eQDQ!RYzYL1C}{eMnmZ-Gg(&8FwRd6garKf7EU2%IY)m+1X{>YGvH36;8> z>s)q$^}nRrw;vifJG+~i0OCCpM!t`<`sA1#8%8-Q@Xn5vse)SFo?{RF-u9~I_S7BC zSxIu+wkKC4uc*rLn74GMGSP9@N;{OG=y{=CWRrMYo%gzv8_($V{3LU%OJaxYwwRL3 z`bP@Lr!Q&QUXy~ZxQ#r+77h;$^!+HKBAELDR8b43dWc;vSC2r_Xw8Z{rIUYv?&sKj zDE_l*Y5K6XfhH{p-`Xu)I<(_2R0ym*rcFMQoEqq`jy|@-u96fP=g`Pm`jTUwcp4Su zJV=nm8$LXOW)?{04_C)O<}E&dIb{1Fi90}~cP>~uM!d!(%n685 z%RDRD4j-&2S&=!{^k$Z^?xle&1*)$d8`xhOA4-LpfZRmf=?qwvU<(BUNN4?A4;kxi z`CCVW>6|}?dOo6DRjklt3y|3Hlzl{Pr6o_&_HR*iKv7kb`+Aufv%0`6^Q9t8!P0RAJ#`17B&fO72w)o#tX|%@*bxK1c?~ zivCJdJW)QsKB*TgRW5;Yduf3k;kn*bOz5xze|EWWt>!7bXGE26FhGWZIo*~+XG1Y$ zCDnD44-gUSPz2C-b{;hk66a}B-|e^xh6|V+U}E^QU$*`^rZxjl22~Wll|B$9WxScW z(|v);V=w#g!*U&_Sxv(fW($OSW=7K5o0gTffc^a&1@WaQN4_64WB2U{vU4AeE@Z=o zx5R@K6?q8IZ3bg70m|TX8}9l0#&y@W+`KCoohZ_$c%G1InnIT5@x6N}%lzsc0W?`kn}` zFGM~DF4q7#-%pFDx9JG!EDY7nRZ(g!#;G?V5ArJaW6AzWmNtJmo0mZ;bn&{|F4eCc zG($*4H`=fzan!qruuu`DlQNm_lq|l;zw(jYE&r+^wKu|K&NC%#s0ymErCO#|Uv?h6 zFJJK+L*&bB@In|WtC6FlkYpcw^j`R`AH#2`VR@K*!2Qs+ox{aEx`(oiU1O5N(0OqOjQev%g5|R?XB)jpa1|T8t-R$+-~E*0 z$h`wR>Iexd^s&b+ybYIddr_*|{JW{}@EQ(J!WG7;I}h=DJL_ZUH?-GZL`j92$%Id-pz{t2A-ZMUZBDww8{&n!``q>()E=f$O zcW*FvuXNJ9wH8@Gw6o|k^0#_$)Z?mD_t-}G19@FQ&hRSLu8wF5p@I=?u-G&}In zUbxskG7`1CXKdzq=VYxOoo`z?;bm5ml=R%V+|<1f@lsF1!qf_;vh5Nh>i2c>u zCtJAuRpd8wwX2Akv83-?QRb0>_e{!Z zgWrD_f%y1*`*GW+9kbD!(-NEz1*uP}xnRp4TU~l;2g)j3iV19RlX;)O> zrV+^6s_?sW`Qm%7i~WpvXhaAMTp_EU&}EC)lSJO&p)CSREbo28g_sIT-&D|ID5g6f zDKYe$cMH4R6cYzf_h6UC6qW1gdZO3X<>R`Yr-LhV(%(#HZiCOPJ!S3Ac29-XA%Fb& z7Plc%#*8yJ_P8rY+M&w3axuRy{E~L6*^%D=M$^dJFk_1%>u+)-Bv7>-AN@v~;QTif zi(==82-mL8*GTh*ETdUe)<%R_>l>JzQHv-wW>s+$*M>1`l4Yl_)#|O4@n==rfeD*< zgnqX5>cYUOX4P0_eKOIanou!8;Je`g)iCGGeiSaaxMUJCt<+9#msgvnMV58P5%C$U-gN*O?>Jap1fn9>UZD zWD9mBrzZE)8^4+yDdiquC^hwHI(w9^z3T4WE}&`;GB$8B9#thRd8&EyPYrJ z?4}4gH`xti&aQ-vyra!8DkKWJMV_wHn?8}a6)86jg73r8-2f8Iv`V6nyW^u--!2#TODd2xE`pf>S-`|P8G}u;r__jtn zDkUn@ijZw@!8FSL4eB%B8z*yT9=i0L!XVaxXbVS+G`z^@g2!2%6BdHErzan9P%WqtCovUF{{pSd5IMe> zp`c~2-`_!no7$UOooQ|=L~ZhduS(AK)h?#JGV2|G#DuPwZUG}R2W%>lXoGq;GreQx zz(w-&e3{(XrG#ci$3ttkWeeX4BvJQGubKzQ3xBkSw=hz$ZRemAdf^`wq)Vljb=aBv zVqLb|35ag`eu4F*F%4eRAfniW3*B!UQp=G1PS%#hdaK9>dTdp-6@Mj=D4fWkhI#~j zV?vO?A&2YYJG{K@>yzYj*WWR{d^|Mp$B zy;HsSkC}qn)PR*l7skmsuD9yJu0r*Ufatu3jY`{M)1Rx zc2QG#lN{9}vR^ZAMk2tLh+Y0!GQgD@!ULc4YG`@Lb#BHh=Bg1!j2LCks6Az?Y(R!V7OW{d_N+_^NAdY!nBN-xSU;xOjT#i+Zd zf{QX@Vdsi`nvu6au|j1QayySm5EN<4yK>0n@y6^YN% z_Mu`^tQ#YUA5zfpImB`3Ty@s^TNUb|ToYMRYjW79`WrH2osQv)!Id1X;B~kI?PZPc ziRO|YJpDNC13dFuPxC=@)bV{ArFR;0^NyV=s}+2rpMSWOu1UWZypulAXJyFfY2#Uu zBV9G@kBYad-@|znEX-C)lb)o(P1Rx4GXh??_GET2`evI6u|G)_N>_{;AdJ|fxXP?+21l-<@}P2syajlRP5>9 zKa=)pixpzW&O9bc+Ua!)?_Jn&7w2p?f5A zyxd8$H7?w5&f!vGkj1h#vBYE1(Gs?%;kj2@RQeAf3*ug8)wQ60+Gq>i6f3rBVn!zcT-fPY?6 z*vh;rO8q_wbrS0my6E$@v!>GS2oC8yphSGw59TOpYUl*f8EbD^cBp<^gJ%?h@?Fua zYQEc68EAlC&EJD#ozvH5q9XT*V$;w;2$IJMs)Ok+*aAR20wWKSE8wj(v)#ndSz0hz zhP|f;t_Eh!o0WzD1X4^Xolrw6`B_|^Gpdkp97I@`Pa28U`YVU6E?40PqN7<3-d^(# zp;$Pw5FaC@PjT8#C<}+W&93Hlv|u0UcwQHOHdFRiMFd`ImkHU6_M(meSYcs9R-r@+ zR@$FTuT!`|f@ZLzp=z?4VvvwMS$6#-Jj+Eb548U*zNNt3qw&?hq*+b53y~%^2QmFc zHq(q=2W97$K@!I~VTy>1r#ZAo7TB7HMBjt!;0cWqkl1<;#WASU*vzXIzv@*|5e|oc zs|U}nTQA~$Iw{L``pL`e-+xH@={FzwDqU6KYNc&+07haQ*W25&f0g{aoE~FjoONqV zc_!!Ro$%Pki5%HXQg$mOoqp2!YsfOPC+^M~iXW}Ub{ITJ5Lv1%OZ8Eiug*pi-<3Q> zzbR^vb((OXCC*=`Kc1$Mu3C;tdMwI~VqrzM^x%5(ZdPu!*YqnpMF;Q0C2OpwWR;}PE!Sp#?IzIwopVMo5YpSs_P53^p` zL|e9pkH<07XK~?}3n!6Z;;ceSt zmdd1Tx5&+AjEMrZzVwgrzs8JyY%qjd7akk}P=46u*R61W$EFzwv@( zx~^9J2|*$OB}D(#iT}QG@6Xqru^JLfgb%}`_1wXnom*M26l)a@Oe9y$oIJc{WRvwN zc~ZKwe<}U?N-l#E;^WJ|Zu#{y&T4kLoC=JH+GjKvR-1`A_jqdMYUOKznQO+drlT(z zKfr*k4DO=1K4N_6Zu~*y(+zPpG<}^gtc$+y(Tx2A`C^{kqdJ&VJ>;z%W#)Bx1lVg^ z)lgaWEu{8Jv$H3hy;EO*;<%&k4|x;Ca@j?8n^pE5e9-uV%zz&lRD4W~<{nx`*tTjt2?&x5^Ttqw=Jkt89$ZHc=WryefYCjS}4aj z?}HTActw1Dzgt|ZH193kE-Q48mrI~j8u)g zB}~)LIG<9AG@bVk&koylB>|jOCs4b2`SG9gS`<}! zZQczUwkyV~OJ6u}C=cur0C2I<6L>Qd#FDa$6ONqaZaLVGdlq;O+Ai4*7wFdk7;Ww-ldg z;<`oL_nfHoF~pXUA|etRn7epvNBzq2unGi5YQ(QwuG3SRYvat8Qz2d>qr{0GVTr5j zR{E{^=)6GM5HG!`T+76k&FeqmV3wPf)qX=6P6%RSGMRktfk{J${Xv~?Iw*W7Q%APs zM8Bm_2Qm7pEU@bgD@qeRO*J4q=plCZOL5&0qVL6gE@>w>=^w2ro6Ue6T+W{BYX;r2 z<-=-aJIxnMox)FF<`Q;H9zN}+Jj}$FJT8<+$T}n%s*V(E&m>xqg`IJCV;JzCV<;7! z(VVal$4c*yKwkx0+Ox{xJIYdU-<$grAjcanK{hpEA4r&+?4zHUW4<+MHrlyA5`XEtu49Z8nZ9MHoHmQ_mn zhM&S;%f+~uuF1$gr-AzL;5W=!?Z%mJYsHM{nmk5lKl1}fkd ziVNqFtr+_byj1`2@kw9v7d3)=h9;~x=F-U?f8BGaig)F3 zBD&v7?W)+~{1%j&;hNn1$u(Qf+kDjN@*idQS?xvC4ECDeHewz?G0#he7 zCsstL(w{Plu(!FNhXfJ{0n?Q-YOzcW?9qza`gYz$;4N?dn;5|r^Pj^{3o7ySB<|3Z zEr@B#4pRlOCC?#z)^q@2e){xYUpE2rMRM>FXPmLC&x`~LfebWD@>?Un}Q0R z?W&qdt#z$7TS909gY%<;1R0EY_u4#ZKbm_*{+%P)iLAaOC~5h0SR75Q&;)qgPVa?` zZ9N38VjZfZx+7Pp3f$(HaQ*`@-RF-~@`5qmpUYKC?;y$|({-2ZH7m!Y;3P?}1V|Nr z9~k))*ryOY#+AV7tiAXy6$Md1)dc)WBQk3wcE0FpoZ($wnpPSit`ZWuksAs`LDecg zxv-R{K7o8qc(^UB8Qw}AI^Svg|IfR_?q}RvzMS;*8+a4d5)%Lyb;ETWUZ35T=@62< z)+fexuiom_)_+HqL6$jS6pv{-RHk=BpbVmu8sz1yP&{&Ql&7WTN=UFl{1V+Cz@g@f zA-19zIhuL3gqCAkeODGm0{NwA&2ZARt1MUhRvjtc3&0oxC$nf?YKx+}=xKB$Hyjs8 zk2y4EZ<$46>CwdAF%pwlylk(R6K+3kB7Kid#S|HsZb#EHZQt{pyHql@7-$bZ`^)&b z+h*q)c2gh)l2E5jq!o_l)0$@=t$nZ_5dORyJ5#AC_B%O3i}GxQgT)Eha;HOL_@Sa< z58ox$8^vTLj-dcboJob(7@V!6cBP?hs>yC@N@7A-8qXcNaymut^z7-2ke|LB%~;N{ z{z_oDy6Rz}RXvxws@=lX-o}H(m_+ONBFu2nY>To&(~yXs&)R(hm4GSz6sq_3Hgk4k z`W22YT|SY@aekU#feXOKrZLdtD21 z8q%M+|*4(yY!krPml6|~uw z)m0l)Oznc?xB$BSiUnir_D8J@t=;0CmK)GtNku+f!b=`}3^Y*Qo`}>bjx6}r5P0Hp z%OgFR#L{`XvDNk|EtHX1*d%b8)Z$fYN}M4#FsyOlSSSu&DbGA>UP_5x__o4;-3^y% zr(Kbw%K`-}SDT*jyaqc5yjOqlOJb{Vsq#ijkD zdbo3n1{DM?mtNfSu@lMj_bG*ufjZY_T;58=wiQ^#A*t z#Rsm{Xel4#;sN|%lzP5;Weu8Kgm)a7#;q?5M4#sAkP z+-l(a-HGG6BM~R{ELG2LGxs~c6p&r0*#y7=ZRo-qG5UD#X-vAjiftLabapwcO5gm= zN#qbCEaf+FmJ`-O-XTy{)OtCkC!82ONnu4Ur*PN;Gw$#G&-2w#V_OuHeq%dm>dxYKR=2wrC_XgQ6j z0p~59{5ydPT~j6h%HaL9QmB1pSWM$T%TW5`b2O5dmrs+bmIOEQKFH4fqx_}GkJ8^2 zF*f!q&`NOwMyTm)8)&apr}0^XyDeW`K1^fmWy$ngD#>6a?tcI=(4lLyXHVXa8(B)( zBx1h=bYcC2cLLou0ZjJ2Bp-PvSF{DEyPf-jF@{f-9Q^L`R2H@(G24kdD0b23i`!3) zJ5mbX+!TV{PlIJ*Q)v=&iVuw6xgQ;=sULFTQ7wH5_z#e@UeupabYuT&J9ecvU&+cp z34`a$EESSc@=T@u2e40;T#ZlO1Hk=@JDW^fR8S$dp&rBaH(HVgba&U4xos0<3T${e ze#d`X(*BLi+dehjmWf1IE^eQgp(?KUCMAfZ&^!mltdug<2!BE|N*mu4dOrFrEko`@ zSR*XHK=NMqX=*v#N**P1jl2Qp#&$byd}N0v^4i`!=SVAG{ZFx)zzIvPoXO8Z%F*rP z=&Dj8^AGe9lD@N}g(HLpO8me(_jd^(%sEv~#jh`<;Ct8yI{Awz`c>rW{ngD9^=huL zxjc)={Lw!^3Z&qH%OewsXP4pnj7r$|;#XF@=hA(V1)z0`Q69^uBEBo@{@zA`MfZgE z9SdjFJBVhOW|Ehv=$OMlZFtbYm8b$(GCtN1aES5iv$(0MWNM5uxCkIygvUK|KDDArKeu3 zc|9t)eyC3eCKdtk)h@^`?>ioHav+yKb_U9R@zQ83gR#`Kh&`=SpCDI0f0+F)z{YkP zA@r56RHz&2`DSwCF_p6ePERhx9l^DtUt~!kg>Oe_e=U49)79%8E(BLUP5?)64T06IDW_^6dFaWF@v%8Q5Z4lSzk8?XtqiUC5)T!s|rMJuvUXzEL+xIpC(h zPNR@Wj91AFs&x}Ih6q*u&Q-m0X)xb(xsh081g>N#?a<~VIo=HLVJeRM{kA_9JIqsV zN$#;0le?h)rf#o0k@+Bnv?T=WU z#!<2IZ)uD8E;R_1AG|;DgdS+vG`HP`x^k3N7%@zYOAWg0_v8Y%tQHje=mq`cZ(Unw zzZHydgq`|_3u}Fm{xW@I$6Pz6KSzEf61KosH)_Yq&5=j@rf`Wi`sxh*S$5hZ88)wF zb6oj9t;uLvF@A#S{eenkzJt8W7trR>M&T#Vl>H}Geg-&0VzSuRhv|!!TW(FISVBJ< zZBJH<#82_zLUcJb@~V!L4eP3k&zCdqCrWbdnM$)KmNl&nW(i%wgz+REZ&k1ZKj**s zPUXsFtaKr=-VrPgI!MsJ&*;*A8n!vP5tKA&afJXKp-8VpgM6o^g27WSD<1BTH=_0f zv@s?D;jz^Zvrk;!`HLfj>ebi?M&ZNmdsJqp|!5Q8^Vi&w`k>x5N0t`J&zdCDl+RQKgKHt@X3@#|y;L zk4$Z~3Z*nkn^B))uK6w^V1e7tvxt+@sp_7Ib;q((F5)EQhPa-FSN;buaSar{7WBIR zb1nLObnkM*$(G4V$XzT7@Sd2e?Mq)?bwAFdz-gWHuX2d}$iz)E?5ow|@=Z$0B&h6aX=CCeoTq)nx}Q_vp10eFQ7J;tA>g%{2iRRKe8F!0}4&h70V1QVo_4i z9hQ5XW5`1XKbDN3Xnau66o%)92WI<*7w79Eic!02)b|$1yZV=H2GSi<7B7bnakt5G z)UgHr?{K-b=ofORp~EGUiAMxu9@rT2pA#0d3f0oKF?K9dL3>%!rUI= z5x_$ND%wCp=YYC1_1>04??0iw6y&nIssO-J$x2L0Q=EFd@%$@()Xfy`{% z*aV-@2F%1{ObyCng|r6&1YLa)wSPMa&E-!~W zZzinUjGvKm0_eDSrCm*5&WQK4SMJl;TEUmRktasMZYk_$II$w0yglQ}WZ6}L+VjfQ zkm^wP47D;9hViUroYAIRBF#XllCN~3yv0f>a&3yO;HOEJwyLFMidlX+6%y_J0U%*~ zSx~5MVeZs=LQ|N`7qJ%E1vUBGA^>eU?`R*Q6(6Lbut#`^s&Dgd1mU~>$+`XFgIhT> zU#)<6H}xN&IWXD#;4t8K^SKFDn@ZZV-CqP#{a65lS&~)qYl5BenhGg4*F+sdXD)VG zjVluJ%-oFjXXjg^XUDz1O)$v&#C1nzT<3g~e{ofBYH zVL5ekqhqx0$LQpd5aeG0v|dMgclauQioc@`iNW0Nb)-hm+#EKmlFFt~G@uUGd@!tO z<$ktIY16$kQwXw3^6NCwhtF37bX40~zV6utZ)~EGXSPBm4b;8D(YidH-((MniANAB z6v^@0?O4Mlkw5-SU@A=Tb~umfny|lnQ!zkxaPeJc#FU@`UW;}5iU$r{Xcwu<*q9Pr zu++v6u1YavcbkUacx~~gm|Z9@CeaJ5FdAOL z*vp!)^HZ3vB^4F>R&J|7WVV&pVD!0ymWk zOioaAX)JABBd`xamG@0g0&`T!e|7SLwnX9h;bh4& z)(d*Hv6P)c$6Cr-`c7Z;+m@aue%KT5p?oETbc6u7ahv#O&1UE0gfE}2OxR|`3oCR3;*l5PzHRxht332nZ-ZT zZ;TWr-g#aw_wWQZ(ANCCeej3O4{sAug?CVlH?DE@SZNesy2>oht*%<1_}~I|(nfy+ zOV;3uckOFzwc__RtaGOP-j`4iqFb8VoXU02THhF$hl`cco6eJ}pNwcJC3os+yJBB5d2&Jk&| zOQKIQ9B{lv@3CYQAQvnhakO(kyx^e(c0^HsnN~^_z$$ zu9sf~PusiS?m9mMy#GA3AOQU94i+me46$Vv$oQMNF9w+p%nKYSsz4%19P@hQc=p9D z|6h%nLt3fD4vB8~pBIF#Od9siFC_Xsn^lxS-75a8{qJ9iZ9H3qg{SeL(|7)|fe2Lo zUr{;S?iCq;2H#b>7+gfS@LT1U7XBjt3ViNB@1aoblR5r5$4MhXquD5zCPJyN3cL&C zV#1o=agx&X_dTTsd9ljfsA!vgnKqbSbj-o-+M-jnL2$CCeU7qOyKgj^ zo`d!i#~8KSMjhX4^@bCoaeu%q#N$Rd+<;-Mjk=?KXp5!Z8i;ERuXfl0Ij9B37CnP5SSp^V=T7UAS0lIaMf!f2ouq5f8~Iw-c?z3E zVRs~>gc;#t53RO0-bm;Vqz+F}^m`4_;`-NxM#QC)DwbGAiQ@Plp-UMUjoE!;HaoZ+ z(e#~Bmt(bK{nDgsLFVs0mKyEE3U&GHHPC3`vDv1fI^%G*g)i9^MG9KREpS%JRKB;L zV5t25vYpyI(jZ4P?gAIq0lJWd32-e9PCL*Lis!&Ubxm(ivKSeG-Ryao#?pn3XcGye z1Hi|<@OW^tvGL<;#QN(;xc0+iYmCwlc33RhHkI2R;@wFF*fLUgTeOd7_+Tq6LI} zhYC0q6;&LYHP&4w)zaf=GNA<26*yV3ZV)Uh>u%9CjhTZpX5vSEeAlXdH=th&wQ1P2L;#4(T)X?T-SK6|ox300 zlAi2D{=ey`v;MESbIY9LLKO2Eb`Uc#*mPN|b2vD5KKgo%u*&Y%O6c3#Tw{NRd{!7T zc9-BD@TvY?9+Z}9;F8~3{omu5mneb$--B}oyoJS-Y?@%Z%q)+ym$odq^Bq;!^Kb3a% zOLjiadb-vZnLQ}xB!{C(NCuV@vri7nbYIZg3qHuc1sV7i#eY+yPC_~|j`q@#vz*jx z7)8qP1h|cJJz^*saQQNxWuk}HCB+36=-YvUdgaYyq#g{#b>YieA*+T*?@`?<+^!3K zk)z4&Y_|t5%7VAVxEDUAZ$D^+tD{NE?};%7jQEp{u>Hxa&2n_)4~$`*40I;%^v>* zRe!gPhO6Lkx}A+_aF$ef|8$lsw;ukjy#xbq78!08eAWf+0oetXyV-lGWc#{~)Q0S- z;fS8WUwiqst{WhQGyhrk)d`d=O2*z6BU|}iwiEgM3u`r?AZmb?>c;zzY@ZfnnoLrN zR;VmXn2gzXo`uB+nybQp>Fc6ns*Kn-x93#g>+Zwgsfj}r6zJyvQoXy^xD9HKh%&d7 z4u`$nxqm%BG5A_AYOY@?J>JT(-e%sXQR}H#Ek|l}ITDvA)<`NQqBARweIHnGuVX(# z=w?I^%0ai)NLg*fR*~#fsK?5$mPVs|qApBuve?Wfe9QDE3JELl2PLRZO4VXfQ1zO*uX+;WrvB7l?E?QT{W}cX-aX#^-K6 zzX5y+*UZ3C{|GFORYcALQxv&xjItq;Zf*9&Aq{G=g*b5;Hwk{^IZfba;B=Wll~6nu z)%CJ7Ti8Y;8=m{JGEpZzt$Arw!45Qc%Mn+!`1;$w zxg5m3Ddzgb>FcCZN3DBthj*SWHi8Ts<{PW3c(?YyGWCXNK1grX;te!FPU4vQz=R$) zRQk2`ptR59*x$+|d#Y?!xMKQwLUxew`)=G`F{Mo<&Rz#ITloJ&w>^oyZuw*TecP&h zpx_I9d`f(npLE`NDQm;U6@I1gR}>bM(K8xX5qy{FMd!-t`3Zu9!;)O>K`K3FvR41r z_Ae1zgM{(T?Zv-)LyKR9!@L6ySes1JEkG>aS@I}6-N?a--@dc!mvk+^0QLOakDBY^a^2-rJA4G*6xX9#}+eWh9KCVsE4^sq0^`A>kB-!p&(e(#KPSC5Ocu<1YO0(yl zCEk^Y4hF%tV$-nRJGMm#SplVzMgh zy;U?(4jIq;ZDgDHKx`WV!Tb9HVv$dt$-^pjmO2F;INvAx_U-#68;T~C(_6(dp#6!> z%A)quY>*X*uwhk%BEyrX4&fB8zm0@$@t#q0D-QNG3aWe5`7OzP zWw;3alo(WSbn$c&FdO6EcF|V&{I1v@VH6GSQ@k2w(Cwr%&zO6`D)*O7PhnK$!O%S) z5Dn0pv!&un>6QZa)K()8)PqtRv3JWXm;hc!Y60s+!55A0DAs(?^Xo%fUI&%h^)rI~ zF%hkrnewSkRFZ&RX!J~<-;SLRLo#uWy$bO!*!LIEoFCU_;7O$CibJ&2y;Zw8aq@Wb z4zas$qiCN((aiEm(E8JyLtph_8)PohQVhqx4@+>xSks{Cs3;F^X({+ET5$o6M?-R? z_s2kqc$GpsDq5-fEu(aX3yMe4Qc=n0$3zMKDc|B(t*veBUhAexQ;CZc#;3vuow(zN ziDHKTPvS)`W%K>BTFk)oOz(zePbk)5|8z7F)lOo1Z*Tn8UFceN%d88Qj)pX=~G1UtaGu(;jsD;^EKRz@yYI&cJn1QW@eNL!9mf;>;Tj@tHe}DOfgkFGO7&&^OCEd+v`M2nh`8UGMd{ijZ zEEh(k8`)@Pjf3HE_xdSo%|;gc4PX`O{e1W(+}1B@CC~HA0`%fKAB9+LWl3OBK?+t< zNSDFT+I-nn^T~^P>mX-Otq**e>q97LU&IK{K9c8_v)rk-KotF?LL@K%y)XrBN>~e; zDAFN#REPg!8MO@>u_$TD*q{tcPG{{S<8Sorzv(|!)o%&-rKkCG1upv*U?Q9|xT?P% z+$9?1ix1k@73CxD6=PUFB3_kDAo(mw%W5Sb$Fg2TvL!Thv^2d(f-iU{=ZVGMgOTq+ zRd+4Xb7G?1c3!X=e;ejeT_4wj0WhpKm4@tPnL?g)!g6Gum^5SM9&)66>OP!UE0#=F z4kp&Kyneh8FWwY+sAE%?F})}msPhcU z20V)$aj3Tttd7o075Hzv3;`AWRHE?T1ZBC3;Mgg5uofz+l+LO8W-X*RL{@%L`>_+U zANe}wTFoNjh&_!U#ZR5_QPLIHG^YU95(q=kPn|oVvtBWC_$MwA%5O;yeXUB0XOMbZ z$k(tC0xYVCT+DpVVY{uw!Fe8%5;t*J@7Vx7--o zHeTw<+7YbghaFDpRRBSbi%AZg;k=@0gH6_m$GFzl*32Ql>Or;9F(TBYhAsiEjLo`d zQ6SRc3%te_d)dO&r>H{bWlC0V!>CnkK)1CWgd1f%m2ou-@p{M-mD}jY9&-Ai*&hH|)%AL`Ws2E)y2pBgCQ$I&QDw{xO$Q2o5lbJU_o`aEUn zvWTKRMDpUL`lTpf*(As&ld!OLT=e^I?UTO_&bzqmA4)>KA1SP=+gbhi{wo*h(*M1J z#@m1cW3)=nId}3*QiB~flBaTGBT+^B>s{CEVivP3e-^UiFJdsJa)qBeGmxrRkK2mq zOx{eX{{YJWBWMw%;v_^`O`G2#ssL96#) zCr7z{*KTv~^rW#5sc0+mHW+ZA%J#v@6|1Z391Po%V`=GV_8w^SS14LG5DZu4(nlpv zp2s4eM$w7wO9RJ`fUnAVpC@lt$v1!AhvICWPN(GD9>)s!YF#CtnvXW2dV2Hyf=zZVIM7g|(-EOBan* z$Jg^6Utz6;!}-HN_WQBauicx@uRXTR63uJ7R)-)b?#e~BA4TK36gd9DW7;unWhaZe zG!CWX#FfMrFy zOnZHcsSDJ-%h}nCg>rp7sp#GYoIiIYgO`SEhDU5r9G;b+RJLr|R|q?nFBcQU5pdRNC7&ujS5Ome-s$M<|7r3zr1!p?H+XzHeCMBh9o<;g;N*ZgZ36 zQ^&~ux1LiTa3m+ckTcK}TF+kUuSb8f(%g_q@&I_nJ5YuBGHcQ{j6pZTamD*f^bGW? zJYd=n3UqkSO@*1>_$IaZUPuk_o3`T$7|Be6HDlR5_wdtrds3KMOuyjXV(XvBzS?y- zIF8~({}CuU6cqH6t6utaQ1Ev14@WJU6Cid(U8>V+9~FF(8PN$pYtxBz%ygH>XdJ&-TWrY=?AEaDac@juu#d?Lj^3k-EHU-0RN*5`p4|WBQXl3U z1^mWLj>I4WV@cuwVPF2Vpum48RLDJOC1(>KaG)AcM|on!-w-UXQ12*;h$X_->ZY?v+ReWn%7;3Yz7I_eAEM$i=)F z?Azuz%h3nVA{*GMeZ-TCw(+OxYps43OgD(GZ)YIJ>5WoaW!>}A0<0=dy74vnhA%um z-C&t1 zl>cYth+E5t8ZSsQ7|x~N@J`4~$y@g!$bn?Dpa!Bgb(|l~R^Q8$MUkWbWZh+v6Yl)z zAA(?JN`mjZ$-QCv$`8@bK7!AsC)8-BejHH|s#S)8``9KNy{FfRb716S1<{XWaOG3k z!_k0EsD8ts2rS3;uXQY&%T(hux})A8@=uIF{Ubju-ttn}OL060HcfnIO&@dWk?75^ znQ~@1`lxYSTER!N_6TI6^S{P&Jyz-_dc}uhS|9Q>wa}rbw8dqA|Hye8Uba*E#VyTA+5C$ic@ROq{K@df&dhcWg%$?@!9@GNE;Fyv>EvRB%T1pYV$=Gp{qD8MC z4z#nUl_w0~1sl|n^Ua=Ye61_`L2nQlN;e#f%FAY@y3djBkNvwoIpSNzt>S7V;l*ZFl(0Dl^pYxk*guKuZ3D}>@TsE zIscnL761PesKuZI4y^ktl?xK0inrx!-v7?mPT>&;bL}1%bu}YfY!)=@MqK{^+NBVf_{tr5SeYqFtcJ@DN1J-1(>p#r%m+*(ax8NWat_ z=J%UcOcnU2gSelQp^TX=#{=ceQWg* ze+=!L&qWH9x-X3W0?885h=h%q$1C#QQWHl2;^h$tdH9-GBzkoY@o@_b82 zf6S5qh7R$OBqN)CJI{ca^sDY4S;hmS)R=)bcBM+!slCKwmPH>PoJeY<#gDO+^W)b# zP)RpCfT^3fk^}&`>mqS#Vo!2&*%Yo>7(|q@$Jv6Z7BV(ld%CdWP`LY|- zj5}4sw`%v^E-UC0FINbOyEe+r$TuN(Dr!g-kY}SU;hCkg~xR1(rXuKEw%INStfv0Ah4&GF_BJH#-#cJ)IZ&@X*McJY+ z7Qv+YA+2KapwGZAEHX(33nR;`bla1dyKx^1S0NqmGsxQBV4LLg!D&BC8-+^U>E;ig~W6O0et5hyYq$ zWV|ZG+3O18UTo$eV)V5YfUyr-iBnBV{{irRjo;Qs8z58JJheXh&T14g*9a#Q zYAtjG209f=?=8C@1BMtIfP3=|cf#?OW}odB7EaV;$F0ACTK0AyqU^1YrVlg-sE*&# zuC@t#f3CC6)B!S-h2W9B4`L$ON983w`P|QsfLn-o71i5YB|{pni}1`E*Ia4`ShB=V zIpsvHT6mSDllgBfh@GW*gw%jb;iTIy;DbnBmPnu{lnTTxv+@z>!tAKmIY5;Yrmy5l zpC+$?TwVNbrCUThW??vpo6|+f*_dzjI~D{wt3mg-)}TSu@n4|SnKq-*Fg|ieeD<@; zy;Ts;m+gK_^^H)M%i=z^snwPoTBocsVn?}^n`dyhfdZKiJDyfnRgVSybyitP6!K$5 z2HkrQ!(f^a$s=J;+>VZvtid1azdCcjmd}{|IYroUc$VE2XddEcB}XW`6eBG>CkCL`nqg8edr}v|P7DpOYKz@K;a^NC_94*sndd5s*UU+^>0IWT z2X0Re*m}^iTKmXM@Ew?-iGC@(_wAol@*a*K!HN?yk8RaNl$Wk}BAHG1R%mC3$aMd? ziGU7$?tXlXp6*Spzm4OTA#9|=tLaPvjM z>$CF|==HKpSD^JorP!Leu4HS`o%+Y>LTW+hx6%om4oWKgM3AZpGwfy&j4Haeul^&! z8Tj)SpuYK13oMl6S*5yI|8Jn$@o8!@M3n}%4g)V|MP7L6ErVK*IBH$^^rMQ}c4#w>MF4psou0rmUZqE%6;qK~hS%TV{z z_Frw9R5$lW1>T7o=exg}!(-g&yomk-xWuRV6&7uU!nZPBM=`3tpXk`BN!8LWWSM*UiA~?ATA{ zNk3rtkDnuKzK;q&fm{MlH!nZ9pXiV_1esr!)TN8>oIm&p+0Si;4_H5Ya&o<7e22@T z%ynGUnej4LFm(5P*c6|bM>)~A27DbQqDBem@$qtAOv&Zf+5ttzW#z=X`kE&u>JCi| z%Yk3GU-F+RR_V^M`-{Qo%~LL}C3*C--lpCYpHw$Y(g&B8IX(>_ADOh@`x_@KR|^`X zi%8rvUPVOB*iDUP7N7H-zSfhVe$#(+{DI(!;}QPyuoeSUgvy@hpB}jdY-rGw1iAVj z{oM)?44(nuev5u0yDw#kf9PJ^HBQZ2I;cj8Oz6BEXoCbqx|#7D+C?EF0~hyopGd@a zTUoUf6vZ!Q!|r36VY*XVT!Xnu(D$;ootx14oce*E)4uS)a&KwZy8g_B6=my3o$OkA z-8?9#AkDcfXkRqWMfdS{uz7Lr>MtQ*`MjKgxt0SnAN$aF-3vOSKm8I*CVM7e+g3dT zoK^L}4<;YL8Dh7I!gakCy%o4=@1y|p^NZdervF}dpsOfJUP;khu2$_LVI}t!%k3U+?%xgpd90C6B@up^;5gqM)09}dyn!B5`%X81Leo9vCL%u#l&yiw-6rq7!XQtfx1TROi>Xp8biwLvJK4GQ2D$|g ztz~B`wQyPQF+Yh-XVj>h#G_*4!2P|-^8Wyq0PLK|jx|K?d7-p0X8N_SWd1IY*TdWT z^n!>|yu##BIku@1k$+bCK*8l>0oU{@X_)fyj9*|YHq}9K7(&4R^|xJwB~H^S(1fUF zr}Wl7wQQ$nwS%|*;f|+o(;Ks=o?A_ zEi(jrSTkMA69NHtlV;IEvU`THkc%bEGULTw9L+)0l`f&10sULPI2oZ>ZtnO_WX#y| zfZ|TqR)#?S`;{**<``?X0XD-1vet07aBuGzO+=1NPTlvWw%$5MIeNkQyT>(O+3kqq zC%sYBjJ~Q-y{Kzai2`}HSyLBhS=CBXzDHiwF7W5^I=vgU1_89vhS((N#Zq)01&>_= zw}D|S{jWhaVNF3l+OcZYI(Txj%C>6w-Li@8uT_Eixr$akeqgOI*`#TM^pM8) zeK2C!iHoI&oF|3l%~qC|(20Q%ZOWjT${DDHe*nwJQ3~}p3p)QuTh0rZ+VG87$sEpX zihQ)gQ3iWKh^!$}f|yTkB7Lp`#!A0gI!gRC%_WbaQ)DZ}2!ZiIA-Ix_x^SxH2aOr3 zh4M%iqP=xLmqwt(pw1J*RdrL&Y{sbS0BhzWG^_BN^P3%3MmE2XxG7``el(3g81t>D z-mnw<#Ghf)OYl}i@gn1r{DkY6k#n#`#hb>I4^qpemRY?-n*QT`1V>{QEIVSVo&SFS zhyxa&kzF5;J6n39%0VHZc%Qdr`gO}EB=*58_My(R4_4Xj{W`2(TZFUTngGGsZHw*d zz~u1I{-HU?g&o|3B82lFAY52G%5Xz;r011n=2_0WzS&**q-EC?rxO7zKn)8Z*>6TM zb5KNSZK60$lwdw-AgRp#%WXc?}oS0&opt0GtO<&u7m>jMl|NqAdaXgN3(B(hMv&#y*>VjL*YeDH8+$sa+U zh6j>0nZdy!KP!0t-VzSY=CY67k-TXdKt3-nrzFSg8{yod?87D)sR-MK)0t-eKt;wr z(--z*?QO9T5M=!MJJKp#{*>MG0n8s}tm40qZ&KD<{EiW2ud&P$WaH5V0*h&V-{p7W z`r*V#%`OHy!9}|`|HJgc`xN`~_GmK`E!%Ifyo(%{E(_s=F0Nul+XMi66JIvIxeSU0 zXji6e$~H0ouY@PoF4cKveaP$ED4f@f60Kq_o5{JOrOtL!AK6u+`~EOW08ZdCDvU}0 zIdgJqZnR@x?oIL^0*^M=R3Cn(4KfSeVV@Vz%<$zH! zYbK-yQ6F^t9SyY7mj4eRZaYji!rVqI%6~IKH9$J1O7(A+(mLu6y1w@e`lGEXonn`k z$2Snk@HU({`tXW++n~C<(OS( zYE3Wes{DfRrK|icFwFWFU(dj54Yj%FIU)DXg3xgEqEfCe@{lVs(m*b?;_gMVR$hPMaAbk+JnL8S4heVz3ypN=vV=3T&|_`ISNBYQ2Qt5oCY z65rW%razIcW)7&VRMz_XK3>osxSQj!OG?H{%ql2Wqu0b1LyPGssU!m$aQ=ydku2sm z&-*Yv5mZX8MmM^7RIuwy;fH#bV&RU zz^3sKoqt3g%cXhBXRQ#(p;1q=H!c-hiZNW-#eop+>b5EZryV@UsZV1!e7n@7ejnR~ z@4s!TVga$~XTQff!RI;6Y*eXFep1IkzK7!UEKkcB28!EVk!Wrg{ceyuxC+VAPZ_u> z+==5{rn(M&({dI~MU~iXg7^GoqgYMu7F%&waK@TS!F3WZ%OfUfJ_la+2&ej$MVP8_ zg>bX-RC7Ci-pA^l&;DzjtT28@@7GI_ALMB%N+ozhS$awxDpAWU2b&w0iQe8RoMv#X zQhM2qyum({6FNkz3`!Nu)e3#{Jsibh>8@sp>ZS|UsZmVrea-2U4Q6BFxqIq17M^T1 zvY01xBiV^ONkNy#XTAXtBr^udc#L`h>^3VGN_zqr8Z%X9>ujaPlO5moWIdm7XGh(j z$8=tQY@drW>kuhN#ej*W-<^9!Cm#AGUzoqj8B{**;J$`{g6lOHzCvy$>b%x~2qK$D zGlFNPhL{xm*qF_m6hA2IDoJ4;JKiBPJf-VxQhmMf zojAm?0n;LSl#im`2ZDqb_yJNZ7t2G;Qjral?E zT?YR3E9f+>JEdXCAE9YM3j8L7}YTZmw1M1GYN8xG< zriC|mS$UjP1%=8xKC`E6Z$phdP|ZPzG)3_subc6gjX;uz5D~lErK-0RH zu?!0uokau0Z|H;CHyWyklD`o)dIi+2vNYzo`Oxwy&a`9;QJj#v#_Tfo09i3b#K35Q z=jF%Wv#f?25fy>ma5zL!^Vi9CNdBH!O~#+D&eaNF8)WLU@e|>C?34IPf*Cm+O}sW_ zm9a;8rE#|u{P~CfW%jTTE9x~6!j!MOrDukvh}^+Qc=}e}iffrK<#V;XOJgy~v+BEi zC6gMSkv*rubt*ugUNkie12?BvEGs}rLP|25o(T$tZiyHW1`P^uD>0_*VF}4n7KN*> zDh>nPvU{BeR?C-VD5}xSI^!fva1ipKase>XT}5- zL}P9vhw*vBw-WUogLAdaAkLcWn}5Q}%gO?$bl*c9_ilcoaag#o5HJ8lpS#Np>gf#n z0UYSHaKP`jicDk_j#N)lx`e>)O2gj7xW85Iu}FY@t|LkOUUlZ}7M!92=Xc(Zb2$;) zH4avzAA^qXx%Xf)-^%WxtVI!`5Y3BUbkS<|T__G^F7NjWn7nv%#TBb_tRT3tetcf0 zw}Fm&k|RJ?P7L@$=Yg?1E^FFSkR;8OiR961K-&h-CV9DJi|io&%(X~$${zgwXtCWt zL+y%!BMGUCW=|s9dHcpmVfxhwS4pT4{Cs3xoI^fF&Rxo5ejhclr>*a^ioHL^r7=zd z=t$cwM=LMTs5)~NhNEu2m9~kYe?ZAJ4Q^d0d|2h3pBD~s( zCpF1rH&(wOpEep*@!3ql@MT{=S|KkonD#Boy7kOV-JS$NrAp>c{X>xjq9u(#3112m zLX@kXCCkrijr1q;n_DQ-j@5gO8>?FQ>3&$L+|2$e6cmmZ>(vu>EwG3e$%%Sd(4d%C?}2A)a3_3cmb^1O+Yef|E&*f8T{?q9hNvMj3O2;b&# z^r%olYT1J@Xct}=KSzVg`^fYY%_^LE@>e=?p0ifdyOtM_NkfL<@u{ZF$0Dt7cPglX ziW|j%-KJ_@v6AZf(0)~1hPes1d7BE%FLS?ZMHm_6pKvx?L<|Nw2u6edh`=^mS?(3o zmmihcS%9;f!ryWV)Y*mSl0re6e${I?#rfAW*96TpgJR6X;+BuoxTFA6D|nmEk^YF4 z?YpLF>vijBiEXPniDP=7{{VUL_PUp(2DaEp3b%B&f!8n-dhfhfSqJmiZk?P*p`#;& zYL$_;ZC+;tj1XV)P4{2bD`^ErT*=9WA$qVI(`OPd403Z+kgy?SK0I0{)HB~1RZOR+ z1>n0rQq;CUQK5pSmmFEBCShKOhLi{Sh$UKO@Z(I5Q)Bv+Y*w`vZ5;aU9}9b?uVd0z z6&Ty5x0T}gj;FqYzOJjXLn%H#@~+jm8bf>@HLImxVO?oOlz+Is#C{F8v`ity&hCYA z4?thFtQP;Wc5lw0Qul?AOCYP4S5J~P#Il9ks>!kE+VQG`lH>fcd2%2(N6d5$VRQFE zegBY*&GQVR_)yZ`w^&i&2$S6$rF~U7crQ3a2M_7cSpVFXbsQ+-y?z$eT`-AwkV1q; zj(pWt&Dr%u)1sW3zkES~+oPV)TdE-`Q~4h*&l%g=j$;`Ggyc9*mLNIinQpj1=l3Ti z07)iXF%B$j8|mLH)e3D8403V$W_hnodq-r^E0xwOtDIq*@qtcz^H@lR0aNaxsk(*M z)AsfYQ&Pe*m}uslBNl7j_M-8XAVlQFZujv~V{7+~tt z`EHv8?SFGmnMN@MnJ+|gnMf*<+ZJwmI9KdlZn)_UrwI5&A8#X$?CA&A0hl z>YC@AsG{12?FLN5sC?M(Y#@>8v7bPl(Ng#zH|kb%qE2`sI8^TQtJX@r zna%bwgm3B~!LJ6dk*e3@`JjPEbRO_;DBTQ&>JYtrHet~U2Ug-X zD5^QFM6vTt_U!<8FGf#Rr+UeOH5PBaRe(Qb@A%X@wz?k;!L*^j;EbiV=2>Lyez9@s(`^tDkoBpc9{Wr=iPS!}Vn2{FYQDXx7M4!cx1Pwz}+rEhCr}cbN z0&Z`+MB5Xvp6^PqNlh64PF&Lvp!dZ|Pv+WrgAUkH0W%dyw9-|xbEVwy>GvBYG%vr; zgV!bqisTtf2|br8tx390o4xcji~vZN0SE9pmZfDq1i9An8At4`&a^($UF*G;;G(L~ z$2xXOr+evoKAN**o2Gn+#w8DKiARy=v1R8#;i+1KLM-U>HneI&v54)v^Fy4{uKAg% zX>>J8zu-!wEt;r#(U{wpUz?fV97N-n6$uGG2(s%u&M#5nXiH$2+KOLHSD+)TN8@%D zehWF;B)TxR>|P8+Ui)pq{Z}N%j)i&BXS~uK@8s9h%-4&XEcTrG(!d?-aVlQ#@p3HX zyi9UOmjv7t5eQ_S>h!$gbC=7~DdK{5B5L*Fhekt&qrgA?KfyDxtnL-WC;c{X9`Yiw zVOT(Tdk)Hc)%P6p^((&DdG(bA6sO(y3DBSBaAyPY!mI!-Pu=nMthvszd8Mab z5gp11@K~$$d&Xf*5R8ywt}i|~b*%Z5EZS1+;42V1IjvB8&CwsnbX9)m$e2eqC1I05 zOE}2_!Zl31pQBZ^uWr->*gJy)m}yd`7-X?8gZo@&TlLo7WiXSp*5MT*$+AQ*4MH!U zOBUAu4^w9u)m9g+YiMyOw75fY2u^WHffg-L+zG|q-QA^l@fJ!c5`w!FcPQ=_+zA>W z(3|g^d&juHGm^P%@2s`vJKrZ2bRcyMHMeA1J zXB0@p=(on2lng%yRg`O5d-Y(DLXk?!8JmmZHt}N&H14%_ZJ#yGJs9e;Q6`6Lt5^Bk z+xfVyX}Zd{K>u<_WuUkze=~s)la=_2&lYcon;czq^3Y(yV7X79B(Z?|18ir1m*VcW zBC~C((C@2zLLkRj<3@8&r(U%Lo;mIH(squ?dq&pO`Vy(_2*)z9&K{aj(QPLT%w4tY zQlu-vy-eKSE7*xGH|KgAWew*oG%+8NJt7j9P-s?XlN)Y7b>+2n`SQ+M^pfV61 zRMUoz`9yn#wIunT5bYK+GUkrl>NY0@FsOO{xDXxZv5rYQ|6MUdat=a$rZ(}J>1)h- zR!AkU5_%umc%F}ws_=Ybdd37>>F_0pst0tvu@`z!DI(=>wD>Za^?KkAc$O46N%Hvi zf^G{O9U&p^SXQYzN_%4;9~T3tqVY-j!5C;$L^Al;AbUN@?)I zlN+*}U%HC*lOvc9MHsjHeB5h;6u9HnN_Ra%_k_{B#=yu_%TKE(?1J)??hfbs+&q}`&ie&TCqL0h(k?waj#72O-4G3H?8}r zNxE^pb-3g^2D#MD>P-e})TYe4(ilzj4GL=X6-_=fSF#ZjH-+sI{7y5@iedLQ+6*f$ zta_%ubLUSPYbY9IoKb&lEkwn&NV5oXD& z5<9KbWsL;yw>x;j29Xo}gfANg_tljpmXOPFB}#`v!-UPufz@8 zrb_f3qb`Cjg=GLZ%j{Lf_tgo}j8Yj6Ln%mCux0r3R%N+nLdH)=IX8_N*_C&Jek%sm z`P&%6AH-*Gw;Pb}w+aLpkCBdVHLz~E6`)USn<~Lno*^j$kt_&MC@Eei-+Z)c&(YHx zD0Y>KU!G7mTB}s7@%NM{^sC_gDt2-@t38M%j;;yUmixdI10Vc!2a(U|!+ZQt(~RzW zSH_Z({*vPJ{&z+}I!@7@xJ)|1Z8^U0Z<9{fydFP)C;`W$S`YD?|CqoK{UwU0sEYmU zu@^sPrG8|SM8~-q_WsF+h%seJrkuaNx%=Mr$u{~FD{j1@8z}mcN>1E=4_xDRbkas9 zLLZ7aiti>s(rq0Ff1tqL6MnH&spRi)2`vA-=AL%I@$+*npQqbs9?e>327ODSA2m^4 z5N$fjtRAJz(RU$VkH<5s@86tM|p5%FA zvc775Kkx1rQYY&X`Xr_-N~tf~Z`(w6h7HP*Xpk9S=9hKjHELO4Sm z-4jz{)%BvxMCg=qHhZF8I=u*S`J<&;@Uc2YP85T22HGO~yC7Vp=gOwnmNB z^t$S1&bU#z@X|OOO%*tV-{c_L65Z{Z$0Qi#a)DCM1vX-`YY+&d#En@zm1(qExY zsXcrq{lMGMU zglU2n7qgy*gXI39^r{#)bYUDe)tU#Vn>+Xo^aD$H`o>@Iy!O#J!_=6>xt~IvZM}wY zz$wZ~qOyX2ft0@1j?^)ogeD9H(ae z4bORURMIe-UMrl`C{Y!#D_BFr5sx!2o=(Tp_}g#6GNqiL{_*Aiv)d+Psa#%Dv!%}@ z?Iq}=tWZTWEArSDv?akF;f9IFzw)Bk?w==o?~nh=u08C-8l4Pal*PPh7wiZ$E|~kk zpUj6zyQy2Ki;V?wx#RV1)le_@`ffqfQ$YNp`Q|r$OWe^LPKhW4JFF2z8=;%8Ayf(| zRLXID7liIpnoM*|@9{j{{tsnp5@h^5vMnapsD`+Wzo)z>=m%ZCvj^T1{_lq-K_gw4 zSo3hY>vWf!-&#q8Z@ zdJXrS>8ZtWx6R0VIk~L?yKoD(WenGGuIf}#VftaoNYfb&arVh?NA+!rPE}*mPRq3@ z3TN&n-R{i87#miM1HyjL_Ac4x)=ZnX+V{eQCwbw?1~=HOUUD83ZwlMi>JCA_FPPFI zPLlVNsVue=zqj>2M=Eaf|KTFzK$c&WS9Il0?Kz<}VE)FZrR=bx+4Q!n#Ks5BJ z>i2oZM{3l#tcvOuU-eBOT?vK?Z|Gg`_5~!5f9~3P5I!w5%{PQ|bCtOZ`Rj%QIpVE7 zzc0*#3H(Ee#71;6{#$sgeX!S^Wp;lZn-Se&c3zY_w{7c*I zZ3h0hm9OSKvzAUbFuxd2a7|+fAz;mWepfwC?8-FeEnxkoARAF<(6^J8F*4IjxKhbK z9?M9_Bd$!eB6&1zV6|Yg(u|Mmx%PRZQWuqKM9~D8Ycuh=aFBjB<(m?~>2lR=XgsY@ zRm|!~|5lOv0XPlQu(kfFjzL8XnM=(jc%3I3-1!hEhfVB-|DZs_=`#b=+U zGri1d^z|Fnlt^+>^d!leoZoj}3vQ{*IfaRQxrjb&$xRO@BD|#L$`edqHJc)7Vq9l$ zkhq&@>w%5*{iF_gl492vAXIVjw=qOXx!*j-Ej4r(4?bscP369t9%afR5>6Kfz^2>1 z2WLLyR9hR>$KaeECL7%5dZwNTe3G)bxq-Zf!LB^pf$Mr%j59gcm)?*22X>7j*pAcc zdk!cC5!7jL9c7f-lt0U^GlH!Z)fUZ+Gb#F-j)}1Y(9=m|;`D2-|F;RT)hLy!KLT$G zojBe#D*KYEes@DB8hL@|sUFwj{9Aw2u4WWl;Dz*;lzw$=mLZ{H0fj#nc{%!&YYc3$ zSA1wSr*Zk48x*EqNpsR+V~JN0Y$!O=qd_A5SSm!)cy6hhnjNadcAjA!jeOaZsBj#i!nVu=YKMPp`*#VD1*0-ZSZk zVz|d+h&%$d0vhkj`q6l*$gT6h@!U@Ac{u`nw{=0uUxIn={Y3oum7g zx%LB2_qQ_%_rYC_Vf5d$T1$it<&&om8idT*^Hg!k2?~wR&zs08E4p}SwxvvvN> z(L!s=t$Wa10XOI5Q*6nl;xRDz(N+a1k*I_PKcT+snh1Ut1u+xa8o3ql%6yOJ=>mzt+PmA9*?E(KX3m&0(d-W-1 zM7Dx^AMkYqYX}mqs~GVQh3YxunuQ;b7T;6Nd~Br1ZsSd=_oo}}q0sKM=fX)|j+?i) zBv|;_nifD8S2a^6{xne7vr@Nt9PUq&E}V2$`^ic8ON!se%aqG~rtLJ4N%C^V0QvsM z>EVaTe<*{1-EXFGYiO=`s84nV`;cx55`B|j6u`W$vGpj&gkoAHPtH!zPM|E(7)Kv^ zHS+;!sf1L$!*^djYQx*BfHnWO%bSUwy!>^9S_*dMg$@kJtMg)YXef{s0Re zSn~%hSk0cjJ7&wDBCfPjDYm&YvnL&e?$tP0teD$*|HjP50J1>%X1lgeJHyIZHBIL5 z@rQq)lW)y+fbI)E28xsV9v@Rk{T4LQ92;$sTy2&0!IojGQ%MV zoc6-y+$2}6W;M2dD22GNe<=Jxb-MS-NOS4P7O>;sA{XBcGU*;ynjF78{hQ9TezMBq z9}2W$xGi)!tY7+!{r|@E|A)f+Fb>+2)bHdz#)CC2Iw~gYSLPwfL311kvjy4vcuN~( z3U#ayq3uh-(ZzeNvYGc6uKfRk@;l z{bpVd49q0^B8T-#%Sgt3y*y>T)`%*N^%qUxIbvdH&*vw2!fzB|Iv+E58(6r{iq!O} z3-^Qd*N~p@^>`R!!~anFb<;Ns(&_vlH$dda|1+-0A_!Y(aNZ3BZs<3OfG-ER1;ezp zJK2wRo1T&usjpwL0IxYNFIA|ag%5>k!T-}vvfCuUJ@T2E18zCli{k`F&02W=H-w+M z$ORivp2B8AkN=BqA}41{ZjP+y2`;A)Mo#{Jlkq&&hg`2mGYI4PQdEP1xu7VuXO5y| zXjN5sJg%zW)&G3YxG_0br1$r$U>3R;u`~0tdpn-@dx%?d8_FZf<(_(;Y=2Uk6fT>;F*3)cBBbfjfTD2re#d z+2HTXi930F$j}9aBn{2q^&(5kKA9#X86I&Ec2BPZG4C?@HIy9SEF$+gfT!ah%C}%! zv*5gI1u3Q@uPcc|dk5tZzAWT|E8poN@$^>XTwJbp_Wo}Hj#4>GryZ-&Z;~7#7m?kT_ub=~O)@mIDy0>dN`rbQuTw5~Q+rRm zunS~hJcfzs_5c0Cm#320=is61Nw|T*`boU9X$xA}y7V9d2iay^kO5Qc(lYmUD(^oO z~n(1~hq3r;G%(O_d({UDK_+R&%z0SCgGX zv}bOMDel7*BzPSBx?OhjAIfWZrr|#nJi9kW1`32ShZ*8ELzbo#3Wr1e0j$IYpN_T{ zAoKNSRXh$=Kib3OViz(zL?jl5txU)*Z}@nK!qQ5G!1n7r=L@iV+BJNpK2FxT3TC#_ zV8MqV%vml^cZo9s(tfw9V$lb``TQKJW2+GPuU#B@G#fEh1Y5{@Pgvj$B&tEwtgbyj z!ew>H>%)oP%SF)>F`}6Qz?n?|5ZS^=F^MeAiJYVok8%=kC$}Kj`DK;-XR(ECBbA$k z%&xhLo*UX>hXs#EI_)&ebH3z;U#IA8fdsr4y1gra_%CHgWH7dbkf|E-7&@qn@1N_c z1wD8gM&d7?CDGK)wv+T+7KNy*Qgq zbJ;vfM5*skHa*owvr2@mJ7KP4uq5OYK<2GZ-3$0ZgHuD&WxvfsrjOz%=-;e#A*9s% zu<(voOtTJ@JfJ#w{;D9>9%c`9EP}4|Jyc5l;NCHMmP0-dxBl=77*Rj)_10erMJVo* z>WH#!$)n_4??^7R8`GZJ!m3jo>5D{gG6Dt0XX2|0*m?j3NjB0NYpy|eDNq=Z+o*ki zhp7HTDE@A`xP{*npsX|IDb^C`?c{Fcuaf#6alCNq=+FQ}ll+@NVVV%O_6(Ztl7|}c za2`NypR)Due+8L$Qhb~Fr{XHPn(1=kSU#LSzw>6bm%tqSzfaNK`BD`z7&Y7vjfE1o8Z3m# zQVA^1;R`CxKfn<$#C;Hl?nuzb#a?<}0?i+G`o}*vL&wwY<;@LTL{g=kL)yR%-L-3C z+o?a&GQEF^QdE~o1|;pJ7KhR#Lh34RCtp+*^SKtP8@acqsT54BCqPsGA~0dPh(yZC zrw9n}+I?XGY1LCscj9Q<8ZIGwEqF-~*R^9aAN>Ck+2c%Bl3t2Haa89gY685 z93(7*U@!inmfJ{O+5>qFaGg!3)(YTF2ofYjfM-hmS}4l`_H+E7a+syQuzFjbf2h+^}iyq1RfUg;5Y z67x?XM#f*6Y*w7n@TP~!vR=INbs)-7CVCuvntvAU?luaN1W<6Pj*tlcJ-+q_vTDl5 z9ebEwrWAlzzXE2t(v`+TFbIxKz7(0fTS$4k#xT;$fw^YtcOj-*5go@a1Q~71(>l!ECjq{P1q|dZfpgWt(OujQ64zG8IYaM-x$ny~Sk@sR~6&!fw zY%w_?sw8yxrmUw-yw$u!#pUkJ6|vpD#Lo7MRJL2-85WMh5OBIG_2;QXK||4uy2wF5 z+oNm%xF{51`xY45A20{{^gJb@Qbh1z&4W}32%731w2NfTzI6tUAtqz95Nv9@_jLdM zK5lE`H}O|HfxC~B{d)W(4D*rhH(r~l6MP1KnWKisu}+`=kY9##7E6VARKE1JW*&VX zM=OYu7@h%~=yC~f1*W)GNiC~Bf%e2$(ua3n(q-pmu;zC**sC{batgyvL2PCWgE)pi zM6Sza$TwZx0fc9Z+Fyn>`}I>X_~;r1x_5V2--f?D)q=QZ3Z=FJKD>>S%iooK*WO~{ z7Sv#}JObobo}#>e>Kn>*$E?b{;*;gKcB?kR`gmyZfMhW^&Ii&na)gesF z)WBaYR9ysPf8qidMxsGJqg!j#b|kg>Sk2|~pF&&J*81zqj-Fgih+u^4Y@YWYTXY#unO?A7O#LqO+oy{T1=UAoF@zeQ;0pXar zzgzE9VOCzC9r-kiI62=SkjJ3CrW@c3j=#EnSyeDMN8a;0!dyJu=^x6Cj#8b`7=+0b53%BBK3}iR7$=uHCH$fHB_>6g zWuIJ*G|$+3^}~MMFHUt8V&@vZD4SQB5_qFSZ=ytZqsec;{=9F528K?bCxKu~fZm!D zTjmE1Gv?w%x00;y*W-a5oBM!F12PLd|S7#YFYtLB_hRl<>*yl%NE9DkB=@2 z$;=t*pMNM83&Sv;i^JfUhjr3tLC{6rZZZjTy6)}ehLh;biEFYfiM!j1EX86aOX45K z1anwb@c_Eil+^_GTItdY$p~}7NTH%mzizC{A{;4GX}gC{R8M`}4|H#`*`BP58=I`` ze(?J$le;F7i6YAT{LSKIBG?_=)RYPq)zG4R7FDYTU0L(!>>4v&#V>2vBr!iFJI&3o zUrQkKrHNshH9j)yzbbtb9Dkv!TWatgw5h(*NyXG%WZ(`j4o~jADATdG*;zeiQQT() zPS>P*{XL5CHTl_P>93bE6;9|T>p=m|MsI15`tlJUiPxTN`26U=Pso&#x}?j*OTP#C zdFb!-s)A=o8ns#~-##1cDbCg-Y%9RBzQmF-FGyuh&EJT$)4fVNGmNF8~M$&3`|Pu+!^r>lx+zuAoK((>XhL z=5nhEISNOk9M;#^xvz>#QWL3)w{=gc&yNq1t!IndB-v>c$oV?N`2MAJ<0c1FhQ5Kw zgo`NSdZ8V}M#!}FX2;{#_dBfm#b35uQ2sJJx+K3V6$0{DS@X1U6-K6Mhc6QXvp5zO zvTvzgCTthXrRN+N{WJ>G$vI`*aG}D=Um;-D37OA0Fnvnab(rJT2buiG=yhyziHc z86@s*n+`=i9u(vh71zm&_}GuEO@R54e70tMnld=!#M`o2)C}fvQSIe!3z}PmwfX-pMZu@~I~PU0P=rvq zILihOj)Qs{3v{wAg=%QZzx`1-oqiwkRRo3G9i-a*$Oz{b*j0fME`|=AUk5MTWn3<0 zQCW)MNzr!ltI{sx*t(4vyenK%voCnYje;$fH`soGmYnQB*QVz{ZN|ybel9ix1oNHn zz1lD987 zndTFkYW1yo)5X)U46r0J>`--M%axQ6jJ@Z>Dc)~JpH5PBUvxL+Fp2$=m=-+zdQ~4cew51h+2&i|GmZ;}ydQWQ6dqg$jP^+M5?3P|^6U_rKOe65 z|2pQhl^7@Y{u7}QYQh30;=>r&oEB9Bd`eVjGGTlge6e9&GvCZZbK@E3$$lvqoaoij z5s*CjO8Hr8<$$B;)e_0w$E55>lFr;xnxNZLPr2Gg;c}Kv6r*no<0xz^iT8(tC7vc^ z7s4lBple<;y+O2@x993z?$od2b9Yy}m1*@*M9?nnYrA5Jc`wq-H~BdDIyP#jN4TlRIu^~snJ`9&iGsZ=8MRIloC2>alWKGQF^MmbR-JGN&Z0ua2Hsyd!6utx-;S17`CatqjIHRCnQT?~EZ-1YGr8dTrq3Yb zfNl~MZhXW(S8A%`&6FRl4r{c#ge0p$2eur*3yHN)W<1NWp228^puw$<2ReDC5bPEN z+2tl5om=FT7=_R2Xk$XZJ>onT-^M=`3L2aJL;27Mm){)qh{9qLUeFoGd&P*TAQZaO zQ2K4ADjwG>x=r21IaYn|#I3|#p0?*G$EUuM-Ppj8*i0E!Z}S4uO2(MHVcyl!Pmdv_tC$%t+gk&;_RgYM5sS0mha zcsgw{dz~j%K~P|lAn-I&38J4HF&c7C<>SH4j(X)vTlxm+M9d65PLJrFY`-VYB1+2Ij;0pqwd0SJ@I0- zGd^kCx!M9f$3=lm88=eW#SQe6e&?MO#y;Gm0}mF+bG$?1bA%r}>@U%FmQE#WX+qr8gyGUk@O4}B;wG!{zF5Mm9JEpw?RScCt{m0J% zRP`4a9nR2m?Dz3oiTq7U1vQ$4F`$bXrMRXHC8hkS2R9Ap>}}&ZHrX*3ZjMm81Dw$4NC7I@ci6 z8{>zLfdtvH@0!m|z_7;i9`9fx&z~0`6C0vZYKFea=eP&OO0&&QZnal>A#lz9BV5!`&oh3O+o#pyEF@?Lqzh; z%x(W{B$@lvKuPIvaJ2ZOKYr-=Z;yIE*j(e0)nlek?Mh^`OUMsdfd5(V23mDw5LEh8 zapy=Ui?o(xm9aUt$W8LS3%=y>=>|d2U=UrQ>P!QO2!4jeFk2jD!x+lgI2yJ=W}SJ zw^1H+4_RX+UOn<4&_f*p^ljZMkIB4FnH0qY$NH(*m~gyg3V9{Mk2E_d5K z{a9O0rSQPi7Q6yr0$XO5Y46=#qxm2Qcx)2vFfQ3k^hM_ooCldA*bMGZMhosCdv`~v zmXzM!bEJ#@%T!E8L_P*}%o<;mJ}ncH+er_{HG;}hgv zxN+f)d$g#NtsGyN*fb#)&m!el(}aa-T9N)E%o2|x^wvm=w7pI0@Bu*Xy_(X2BU-BM zL^xb}08r2+my{rN%9my=Psr)DpK(pz4Jnd9-S+PyF$@gX%PyUD_{-6jS6_pVdWWpB zJKj^+wsO$b-I6p=d*u^0Xb_#|$Wh0BFyvz?g@g}#$2JFLnun(onoJA325O!j+y)2B z)}d$5{?IsF=J5y|Eb;L@>+*LI8bH~+V`**vFN1*{Qn6gw>rT?AZnWHaVD&MTU%Vnj z8j&c?k76JbI03ki`nj{xxj)wwD6^GG<;&zoBEvT;P_vwuO+M5%RuTfB}2S&!_mg2eh+4nH4P0F5qs zF>Q4El))==-z4`{Cz#j)M_L!LU7SWIz+pQ z1OKMsl$*0eSE^KRuEFa+@((4nU$-S_VulGwe0}F0@;2RiDP-KoY0}lC;X)|5EX8{qFWTen{PT_Qs;y=MS92Vr+e&sZFG@l|EtRtu8X6qb!DKDIor#--R0OO>uGe1*PdboWB5Zv2C(`pD#0iekH6bi7lOZ9~N@k>yUox}PI9Wz0 zi$~iDYuWu^pZn{U`o9-oEHYZRr#SKYvInkf1SwKbY! z9DPbXy`C0tkq0YC|Nc0(e|Tu#tKL_GV4ViVqKu56`sS=V%Rk@T2u{qG1~jk_S~$XA zrTU7Hm))8>|CX4&x-Yu=@|f%MFpC?-l$2XD?#$I7@eqF(TNon6P63mv`-k!cF!@A% zbGvX-OnZ)X^rLJ4gl@W%6~;$*k$w!iee=JX8KB8TlMS zjL-3UvKiz0+@POffSlG&)Qg0w^Nde#B)*Gp4eygfi+avJZtu(I!*QEKoz&H#(bvZC z2{mf({hSHDg#3oGpOS;ib8k%r7MNbfji}F_YO`9*@2eL{6Z@^cvagRsH*_fNYCuXR z`xH`ILl>5tRrtsr^{un3v3f#y1&kLbRtW~W6gIUT1m&N$FMyQpiuJ@9iX!+_#v3j6Dg&AxKf*kVAW!_rM2HNsVB-+9%<~NN zaDLe3d5!?=RmMHBMl(8~Nd;%$7ZP^CKNpT(0{=}t#%pD&`@lZ!Sy}Ja{PjYVvxIyM zMUB%;s<;fQ822>DPpsD*C2UUlo57V}eN_qY_iS9>5_S#g0NgHWwYC9M`9y>%uj0Mu zvfY-OZWC*kC2HHgWtoRb3K-tO#6NXfyq!U*;#Udqh1qX=Ra`7UpBgfDxLk{ZtyhMb zYq#CnvqxWk6+H*Z!ECBEh((!v*O~e8u2^H4_j7`|g(t5O z*I;3N>o++Pg0;z;3IewRBmR;2xAa6cvagYMU<#}sGe1VjV|@Bjlg_wsY!EX_OrN|( z6YaNK;WNapv@D>cXjZU#Uy?2?ap9k zSb0?^_T4hUVTchUW8AEzoSo}$sw}eLUyx;8GAk6mmupkQ)t5FXl9EedKpv=D-rZ zOe9MZ-M8hVxRla1pvt~$QOHI)O_J)c|Cmkh82*x;^dTqgXszl+yQR;0K??-ZvUhaz zRz5{y(IVQ-oSoH(&JN;CJUt|gh^}FLmuh5Gglp;8Mj)ZpW#>vx*r-RH#3!R`Aq*bJ zW)zzKz0r$T^OD4EF}}IvikMffXGh1(Dc?X&6veyZq9uyqx15T~2dIHJOMTqixA^@= z9wCX-DC|@7L24(bm>VCLc+~+MbE4qFgOT%X}}-`iq@03L(so`sVPu($Mz6CTB4R zYaM&Tqe$;I@#pWg0{@+bl}?=dXJSTgG0$N{O2t+$b(|q;GqA@Mfy8d1B^KhmrERsG zLT2-6JI!N}NhdXg{M<$D(+%jNP{=3pS4L&3NUfnt?dWT)J?5LkxV^}B-nGC?y~fcd z<`6sug#FKj?+tBPYWzbax+*oA6QY;8al51i zci;5%$-8_X`f2gPo{Y6kjG<~Y9*_T>Q%z=UdM3Uo@lgGv?>_CPGO8c{P;}QJ(zo$0 z!%P;v-Dt4To6?CMA;&mfz;iWT@S|QVe2bV{WbC?u7#l>MeOYO>bKsCHwQT+9(XuzV?Mr zmRM#uR7lfmg?Oh-s=A4p)iYYXA09Z$Vi#L(ob>y8NYOe!J4Qa{VGdtiLLYu5SO3Ebj}E zZwdV@!l?%>Cd6}|98H^?&SxS|EO8^}-P9t;su}dTeLN{3oV937ck)z_WuJWL9YRC~ zgqOW&T)cynNwF6XZec8L+Z*?O6}z^(x7Gw6Y#)Of;ND7nqn3hgX$V3IhaR=%pO562 zjB!nlK^HX1Baa0cAHK3fl3oR_-H7Nf%1UT*!x!o7zTw~W_lx&z{oN*+S~}ueMI>H! zT@~$Q<-1jO2;*6Gsdl10PrC=ahnZw;l3lEIgpjH|XJ%2YJW=u?q%@vRg8jPqeQpL@ zBy2#DQ zJY}q?<&0VsbO^V~uU+8P34Z%%s=4DNHS=MC>!GR1xIyCTwh+E6RVN%HMHp<>i?amk z0oIe6Sp>h$RMi|MpH%Ary@>@$v;}$IC7%LZFL>l>nQNw}&Jo__CM%=vd}|EL4yi&Z zFI{54Es%FR`9X`S;gClb=E!>QAna7d?I^*F&WRw~7a@aB*6wLW5{$XMoI^IjR+bqh z6i%sgqnTud9XwHLTi1S!r7`^X=I%;e#sf2|;>IT}B{q*0KHw*BDdCI!9hgs+BHM}l z%4sbwHQhQ_V8^5L>d$rVi$B!Dx1LCH>OYi9gES>%Xx7nk16j+>Y{uqPC{yI~gxnHE zh~{%$O(nrf!hZiVSvT%;sO#|QMi7??p$OX5 zFeG`Td6c?t)yZBa!I&fC_mq=L8C%-`H^-W1T9-#4t#EWT1kh`0H*4oos_`q+wqw_O zx9FZ$o;14zZhtE=+t8M)Kzn6SppI872j+Mx@~lVKOGv;PCVyDSdLA?Y-j)u;8)h0+ z<(s>^5fpE!2TKlgh9pB-Pqr%&>=*mJ5GuQ?cYcBSKeJwUQ`bNbb=|{8O6e6&PyFWo zp+rlGKcz{LAssFUBRrL3xU0UrZW~Ke5_;K&LQKBbL9w?KDN!oe%}L^`)2CGqof8$g z9GOb$+ETNeh3erk!4|32=50AVigBwv{lO*$oz??DI#10CgeA;zQ9MJC{k!bqPu5Y; zlzsd~S7N*F=6&Jwn>G8-Q_xnM=eegsmy35X5!>Plt6D?}``|#sM^)scc>aK^$-$9g z|7L!8=1r;ZZpJrd;;C$DoIj4kqE(t(+{upu8$Lo5&#XrREB2iM-P5ARWFu(4)~Mue z0_$C}Bk(j-Zi-_4FXdXlz$)FcIl57a&1(mFK?Za*iNi2uu`-qU2$pjxYXCsmn9eKv z04Y67PLg2XEW``=0C(Q*BHB1G$a~AZ-#okLP1uYf>{v-6aP{^xFGVoXvTcD7ETOO_ ze?NZ1uq)7$=_~h7CnjOjSK16H%5A|9bxn}9?Gon$o=0Ig)Q7n$QwEC{`j&Wfbgl0($Vr$7 zr27Lv+dQ;BxdUWs7!_4^t)02fu7E}Pa%0KSK`Lqh%?6uWh!Z`|6DN!X@+c1H=~)22 zY{=KxR;&qu?#<~>`=TZ8rB0uS&s_~ulWdm+WW&tmpRzgIDI}1(l^d$~L=WZHal#p^ zU#`gL=eL3 zHbGJIVkJ%S2v)LKrj8s@dSBDV++i&v5?UW7i>v;i&;VL^B2w%^3Kuqpbs?20bkUV3 zxcwa6j?P4D~WKqLc7viB&_#UN6eRJJPPLR3?yOGbXi8<46;2CQ($Np{0 z`^DYlF|$dO34Aybd6(&gscswklKHK41+*gY6_`N3UuVfyqM>2VzFd~J2Pv5POF;7( zK_>reA!5|^&bX(#hD4BJ^M#$Q_Z?|Gb3{oWVZDD0d%`KBU>|(pHWF^UO@O!I6a4W4 zkJ6;1j9iCm%iby(ufR=Q2);0e79!19U|;mqr0}Q~hCI!^zi&ZkbesqR_Lo#Rn$e@0 zn(hJz#~+&?*UxfJt6FJ|?J`GTH|)@khNrBKbKCP#70;{Fva*Xijag6N;?*D-Wny2G z+x>D}EQ>oM`fI~Y%UF?N!TZNM-fjBALy#YW2M1`bMxB=SoKhBi5bt+Cx|!UQC=Ok3 z_-#vB(4K?mB{V9gbdbD9YStFyEj?(`aPCs|C34g<&kV+snFnZF5_a|iSmYj0(VF;2 zKoXKbGI8y@iiU=sf%?ZA_?={MAk_I!ptt(PG7|3A<5|R)X?n*ml43quE>Hf}Yn)aF z(bnES;HuYEGnU#k5QCO9_{@BlN2qo|`Jjf(&ejzpMD33!F%VjDJ%KVCCo>{Vjlo}e zP5->x^@fNmVxklptiUB7#Sgk(A$%l2w%b@YqiN`Ujt(__kqLjIf>U)RtR?wC=y|U5 z`kY;YJ%gl2GPO>ru4Ut7Y|H9xaZ18)&l$r)yM1;^6O-|@Zcd8kSBquIKTMsHsZPZ&b{i#7T{ znicH>6ic1ODxEEk)srelZ&peG;dw3e;{wGXKxs~j0qzf#IiM`LK4+AxUTOPKMf}cj zPkBgO4ry*~?0>5~g;1$n_X#(IXOoOSA;u;9z<4j+TZXq!$zT4(NB7+H*_@ zb4sF%ii&>doE67uIIq?zatZn!eTm!38~);|#rC)TXtpgaoogu9mdEeTIhgV4>lM^&Q-nvTi_}KC(3yBCsa?`JxkmAyeJ0sT|3jr+~tJLzU zmhF={m4cjRp6)_{UUJSUu?!c!@VecAGa7OBAe(Ct2Eqo;2#fAp=IMoPn!kNBN|Lp5 z0%mG#h{GySDgUurw8i@ZqQS8r3$&pN|A&Gepi&tCoIx-PmNoiBZro~vD}}D!RsRg9 z7bAUZvFuG?GUvQGiM%_!IPNas1(Eq^_Nzcv^?I4)NZydB)xY==D`$p}`f|Rell6JL z5MS_|{D>9+(@of!c52A}1jW{2`b?tD8@WnIi~@ks2E8tD=cL{@JSvz}c)mS@2%eAH zuVfj61J$NmM8ZVf`sCg{bxA$%ws6^l91)YmFx`RkrAeKOv(i$VE_{uF=e>D7a8+lI0v=afW-ROQH z3Qy|F@A`*A4T+cV{b5@`4$G|BdLq1fl@VH=;EX>;B8ynDPLf^n}acyb700g z-L{tcl|6LN``tucw_&HHT~&QdRz@b@{a%7)&HfGCdixIAUbULp=M`_}lLfLp-RzPy zs@hS8eUS+@LHW(QezkZLs@mE1Hy5`Qf}m4?ZvjA!b7H=#+(7Y%N%kVX7UJhs!cF1k zLN}Ha$C`ch@xu|^XaNXJKE5@FCvj?wzH6m8D>;BbKmJkcN4HTWR)0~!PNh{!ng zI5b&sNfI&mOewop=k_C3R0P^t{x{ef6yE9`Q{N#s3Dxp$BGqlYl6Jn0*STVf`)Kk0 zCDGMuOzNxt0CgB+cQu@7kTNU$#90nZlMqX3EcYEdzNiUR8NZa8$XK{%BbruxF2Hu{ zRA#VuIij)gp(K5JjBAz2b&y{`fDi)(CDr~G@0hcc^H;_v2-S^)*spuDbVsJSg~9Tf z{j&R8!^E7y3Gx%+#`3+Yv$GtpVEp7~)CY9f2J%0+7ZO9uQZv_Xyp4sB^hEy}Chnbz z#zXGq2+s&TQrJ!cfHn(zHc)*8KEi}zw|Ge8p;!&SwYf(W@uJo~Ig^XT%8DtX}a z_gaf8W_AbHXd|$wo#-y$LL}6Oq5SZ%C}JCN>$uXqs;H~k`gg`Rk8y8#PP3Pn{pw=e z^L11|zyXvc(#Li?E@Ix%znAaVM2W8MfLlwXQS{n(*jY&~J=Tb6!e}H+%`jK4ZGr2< z1j5Xdh*Hj;bVCE9Uf+Aunh(T@;@i|+MjK08Y@YqQ#6HKp(I5_oqTeesK3QXH?Ji03 z^5r5T5RXVsEAva$-ofND9KsooTa7^d}F;ev?@OWv@CIpEfD0p@z1q1+@m=27!+}nzY|IFswdfAV@w5P zL^v28fTce z-s6@*oN1bKb@&yvLP=FHltk!wBj5#mBm#}n!+Yg!m~tG>J{~`$Hbf zTi@m?NfO32`b;n_((p=sz}@`z?cf8C%i^0!|4$6fFGW*HTm1QGrpm#Xzbq+&kGa~618rj4O^h|s@&k%W7Uh(gv zi}j&kZ4*EVi5~U~!$jjG3N2hv{HEF*7J&@C(dWMs7JJi#krjE#TgB7)7jx8e!n1{V zf_&S%H=raUSO^v7q++DhHT_f4fRat`yhH+JAsx>wkdduDs$9YyS$BChJ#)#~-ciJ9}lAY(JdN#~lR+wJGvcK#KB2Ks+v5izo$Qvz4N28twmg3_+ z(jfIN>ZHAzUc}*AtO>`#{8e$Sl*F;xW;)cGuMOTxITL+;2;T_+eg0UT<$V0@lCeaB z*nJz?IVVfyr#1C)oO?;!%z+nzl<~X9aExBY&|*9(_Tf~sa&?Uucjps8c+L(FE&W{M z-SX@t@b822UCij(K|pIg2iZu7&yu06-_*WaX&&t>k@YpKxjOT1;ZTozefEn#y!v@4 zBIWx1@_(tD>8EA=HV#8+;VkEl8tTp7*u_T;0V{h#YyAvapPCR=0Eni`=FqO4l?eZ;$X$gyCxGSKstU=?nk6#2F*zdxzgK5$UVY-c)!0d^|NJmVP<=xg6_*N%nm^g ztdJ894a%<6#pH+#Nu=l|iO6PVv}H3_)EAHHhQC+Ym3i8Adv-}^$_;`Pr{uXEe=%&p zH=(>pC(#u{vCbKjpGewz$V8VoDHx*?`nTRwt^sR#Nd~l=g3y{n?sMFehBg@Zp|0_1 z7EtLih}UXUqLBoGjEVy{oZE`Du(v<4oUczIq1GP*g^Q8>SJW7?BLP*?Y(_OsY8uDq z=Bs(}Ztm9&&C1<&G`tb44O8T3la^h(P#a!tS8$)C}lrn_r3TQs|IIzsuV z&f3wb;3zecgCQkCAG$`nWvSH@=Qt<};mVZytLc?$U=@qdJaT|Sp7G?|(?|Bf)bQ-E zN#Y;iONS4??ZwmT*a=b~_{<+!3VziptAvU*WGb|nbS@J;L_XNj`nMLpviyCV{H!=% zZog|e-g>(dL7j~h?-|ZHhJX(Myo_FfGn*R{!9xkaLZcSN0+4}MjV#Oe zBYU|xnl)5^*oUWOgh6h4n4@KFAIkGurJ($_aM=p)`K-IGLnKjpyH{3LfBwVdr4f`> zD@*1Wrem9i9_Xe}xN z0Xf!biHid5)$Ok)r(s=aZ$r zO4BQ3hYxaSSDf_|i#)%V!Q};qnXhMeyt7LDi!Cp96sbj#Pfr|8Dy2MKt<)BHN=%Mw zgPE*oFqbtnJqoZlEM z@(Fr1Di97O;0rakTCpr^?s;L@gy0`KAGkN^=xw7T6^fmJ$`Vc_{hBsAjR~g9gY4BG z((|GtB)&Jm~XlUJEvY!1*NOOxrFrzWk>FyQrwVP$BG$_GK!%e02)Mu&X34K;Is23 zICjzf#QR{&lV?n!GV}?SMjm5tN`)=Wrooa^ox|5%ai7!D)`nmV1111cxRo=R&F&9L#p1( z-n#86AErJlMi&c+P7+mmuRrr3_dCMTq)Z=jA1$5XY1U&*`+hAnQatpuO-i6t7EVj? z=xk!0W?koU@}W6?g`yX%{oAjVow@u8m(`h7GA@{Vj~_=|zmD$6=8Lw5z3mu>)+7Ky za)hb~fP2exIx4bNc`;-UOIz_j7%1ajZ@iC-S)sW8Hl>M3@leEj_96U=2X`Dc$q{{D zE%H(%%mi0jneWKAG5D-o7}-0mW&WgY!qKwA3Wckry!CLQbHw0?xZoWhLn-aevMC?b zJ6?+encFk(muh+{kx}(;UTB5!DfTQ4r5rrk`*X~9naP}UVIue_)i!~0JnPTv`beF| zA+`=BQMsDDh-!&f6FnO1X*H^A>zmw@`QySC&d6Bz~$nhO>Tp9grMTZkoE;jbfriG z7rKT5xRg@zdyRApK1%`)IX7yKJ0hgPqh%j$e=+METaXLq;;K3<(-x{32y#4_p1)rJ zQ+sI}xTj2w(GE6fmJMcWK^H(tZpxKwsfQ~!$5D{Qz&%4=oi((89z5aPa}dooIClx) zp{zmrsi{@b@}$-`_)o4?Cd|2|=3!w7>eJ}3zAWpHl`};x!KrP&j&Iy-E@PaKleMsN z$Ml3@4PraGoN#x$WK9$Rhu;$5IqV=r2k4T<%4OWt`9zq_Su1J|fxHXfJ694;>oCyl zx3&!Zm@qJZKQPlfedx&ys=ST!_H!e60#Y6tzU#z0aE__1K$FYKA886Ed%@@wxUl^3 ztbK_LQpD@kacR0o6LkZI=czrb4*RJaR@3~U?r^?~giRS=`u7uh{;jlVn7Z*GBr@jx zKoz^s@PVMa1o&QP8kNGYcl&pw zv##2f?rq+smex@uduo-G_7J_ZeXa}U>WZUI^4Th`Y?~w9<>O)&&TBSHH{pU;Q5VDj zL#@MgY(Y%>Co27Co-{R$OX5TUrnEHbEsUbNxLiBPt!UHQ3wQ*=D~%mi7+9^*3QONA z8LgOpV%u9{3C+T8va5FfI|>VovH6*y(m>Car^*`TtKZ~n$pS+;s~3nvd7U@F2ot^h z+OGrZw)yp7R=*I!kmpR}=)+KJ+gl$u0I=LcLr}<1bm#SL_U*mVxBdh`%QbaV?Wk~if6XXV~f_V&Uooh7(sK3 z@Nyj5-8f^zB2rdZ(C^pLPNa*pfco(5*@kxsRPEte0?VmPt|B!FBQn>^3${K%7c|>B zaTo3i3&D>_UZJI4k@Gx};?2eZJLCIQ;=-^+b|tYRluHpjSd+o!LOAm?a7#DI+1F$F zXAUlo@2uo24zM>)*$x*%o@9owX46FQveQ5r=f=L9ni+e8;tum71A*B+eINzrEMk>PvAO?1+24S9Kj&lW+JXG@VEG4>38cVgCF?nhL#Gy8l>oAx*ExN*PI zu?%AL3;QD^bD;G8&`{aq_-^>pt?(rquP=`!Y`ZCV$fXHOSN~y>6*qZ=$QFPRgu7wF zYZ!6iF^)XDi@Me`4@{AoR^P=hDQzfEJ>;#5+%K0G7-7Tr0nOu-K4~a64=}V)uVmT( z))g}JCQaD1fA5jm<6p}YbK{R|eRbFra{IoY!*L|HA@<^jir>oyB9)#9h@ zcG=!vV76S9!Ew8equga@W9zYnhaf1Xdm)w(|2>}+&aA5@7?gkLFaO&a$3PmTE55qK z-&7rVS+o)YfD_yrI3d4qa9wx)cvQ84Iv-D^yUeu)uMHNaWGs7Ji!y{F6*)+A-Twmw zw%b0DKQql)TuKMKerTLU!bGJDebj`)AzCW(_$s>iV`TlB!jyYlsUTgZzRj|^Y%4js zUJRh(;W_8cdd*BvGrv8NGLdHe7eXiaHZoCpqpuwO%0C#kjTaT6qI{PGN7a~vM}vXe zXDqw={;};cf~MO{-~k4#5LbZ_yHDIb=2E;Wj)QIiS?*P;oeR|Ys-cGBA^KnOB}FuF z(yu+UwKN9iO3||l7V^J~zvI5EO6w&>zT}DH^0;Ane>iQp%16}5Oc|kk38$5I#anqL z5L9a?a^Gwt!?A8MV5zZ{mXe)0Z5%!)5~SK}ti}Bv`^z%Rr1S43=BfTh9Fn#2CM3X* zFm#SxdYR^j7>1X40wQHtvF)=kT$kj&CN3l>zWXrwdaaq1HXTLOY zw;6RR#i#FM9!ggT^*)Pa6Qo$(wH0bjR-4vC@XTlKO}xf_Nm#Rv-}jIRh@+4FO^eMy zAY_>1_uUd^vft<5VBeeR{}$X7WxZ{WG1N)0f^c<0q-nf5wSZv6hkos4x)JxuBj4aM zv|6}pOLxisyh_PHJClYaMOqU0m$ncev0xqNi#i|OKE+7m?_O*K8Kq@hV!_$esi6lFWu6~f?ov> z{uuOj)7%9vS6tK_c51Q8@jL-7U#1L6Jg&}rdDhrws|2yk?g-y!Me%B{E{_`U_+6yF zeZ8CZ)7$FI4CAO+$I|p5-bOi`4eS!sKO8G=J$G4qZf%%{Qua@>pL8ZuMvT>O$migy&w8+eNH5$imjBVM|-MS4k&V0~X_aPeM%-49s5V$@FN>$rPO^3k^|_ zPhfZGYmv`brTCV7y6pa5N|cf1pGo9D)#ZMy&mI?P5yIF9WaynFx~p2+{gm=B=8p^o zn!h})v}HlpR1af_iJ(0|mYN@-j{)q>X6{>SC`fu~KdB1Y>_SgfUMN^al3^kNH)%=)!@Yb~I=s`LmwxV+NdV*RgL0Tcdb}(h?&u#3he4PU?;T?J2qtkHg=a@o4M^8*6;t}u;^T2hEXbe$KY+u z88Cfmg_tjVH?%KMY9(O=ch?`cQ{-73R^c~NHkryVF1#~Sag{(O@rA0sLIlc7@}oL? zv=)ORRU&lQN;L%4srU(+SeMqmx|R1lzuLjI%IO6=S)kD_{61f?tLm8B^0vUGpzMO4+<4HogWgFkFADI&yn37d9JOBhTsW1i~gRtUEvT zZ7!#CeerNTX5(veS>kF>D6r6=BF0^%#{1#xJl_G@s6NHpoFMA$x~TMHIvmoxc=mXr znm}n`Ez2GwPH19`T*+tF_P`%Rxi*aR$2NuM{E{lQ3gQpBS(yq0<@;(8w0F#rPML>_ z{En(TCtMsfb+dQzTlom{$HEGZ_|#>c=2V;NuGN&c^wUg9zYdwFhmsaqp7NE=?f`>2 z4jV2X2P)gwoqH0w(|-^kc32I48W?1^5p4q?q;WlcNO%}Fo2`i_ku90TQxP9{qAjem(CDTCvyar0cm0?f9n?b_$6(GSvuDHj-Tz`(SYcsijJWX7RIv?MLL zb~@D*ovNuw+eb>9KX!%rvyTg#DZzKb$TO;vD7BwkOg#9U93S5p#4y+gL~;uqmDEK} zuYDjg8|OYs_#G5YkB0V&kh$e6B_z=26`Mzibj9?QJZIuh$cv-lC_$>gF@8pKi*M2H zeOs<^a9#x+3zeV#v&Mqi!n1ez>iBXrh+VfFBjk%o)FbNkh9v)%MQe_H+Gdek{EOZW zTv{fo2;E=Ml!1S)P*ry|cW#joHI@(uv?NQW;5V+r+4RcMF^hu0ATOnt<7a99+^WF> zbyMcp;^%ph$RmSYgDPG}TW$OKKUbH^{P@~`P{ZR?f@1&d)gE^Dvj-N<$`*yv`l9IT zl{c0+peybED&0LbNav`q&l1YE`{J**KQ-M7zYW(Y6CEhE<90Ri-SkE7Aj=*45Ru0X zc&P`UF1r#yiZBg(J4Mh=b(rp@*SB#Gl~!|i{Mz}FUaTL&xEN#gjj(7RB35J9b*inu zPBld2e1)oiR94He*&3lEAjkYwz1FdS{h(tzn13sD1eWE(3>`s2;}Oq%xKT?Kk-NC$ z{DSAcCNkZsFEH3dXc6REh_6u2E#7$YeR!W3#Dey>vhxlZIY-LzmTP-=(HfZY?XC^H z=S%7ymmgIemPOVzX6>mfd0#0zvzNrTjbHBALjK~YzG2qvNQ~^w)(vphzf0eIKfLa- zsTHpQ+QYxU;aWDREhcL8)KX}zqOiePC=D|3D!^84Hej*F4hF|x-*(d`I#&}HEf#HC^=2#x~J}PUYt&p*XouB zo;8L?_5=`6T}Ufsm!X6dp#9a8`u6i7`IFfBfW)4nwL@A|t-N*^BOV$$)y%5?=AIZ~ zzU@kJ8T{T!A7{EKUry86epN}CILRKn-YZQ>Qr7-h0Y+nMZ51p=!*PLB{%t*Sk8I&{dl`iHI0Y_=du&V8nvYr zo+1N9QpLbXamjFnjt1h2RFJ*9BjHQ-A9;+zR-eJr{){w>>{Dn%bWJSI7ZP6ltrl;Y z%Bs_EngtV_5`khG%8utcYI?}gx}`?&IO)m~q7bX^TZtkmrc zW2?C#-N92r0`>A=%So}m%G+w}KfK(FLb8bq`iXs~h3LD72u_vq>vj0PUXnqYB8Hwd z`2YR`l*IgxFlgGHj-JQo!?d8g!B>q->JG5bee|ujybM&^Z1$|TEczc{%%b`)+0sxS ze`wG~>F-(Y(8SYlROeV^owIEZkrQjrMFWC4l`Vdte(?jxs!4bq7Wra_#2+f`!vP!Z z!gkd(d@{Nbg8l1CA}u5>3%(Urj_FF?h}lxHTU=NSJ-1HotScM(i5Y+UM2EO+G~k2r zk4N>$Y4&6nhI+UJ^aSJQBEI)#Z!&rP?Y%(FF(M8VtAEROta4oAOQG(ye%=xp<_0f=V<;WY+vbH7Mqcq(U@Rt7PH`W zLj#xtC-8xtEhT8c&}&8b30F~#B;_D?ci1j0a1{{zD@#2Of4E%{GNW#)xw zeyc6V(>xYe|3oW9WV#L$8j`_bY))f8c3`tsZ8h(ew7xvL2~{jG^>gCI=d!e17GG|A zYv1k(T`%-&&M1o8Ypo|fnhmZ`crbl3{zd`IwW)HFO0d3trGJp>tbD1Zxawp^iD0Ch zu~ordG1`7u7}dCkp*!5HCkiX$5rMLN21W`@w185Q?y`FO4Y5(ZuMC%XLASIG%9v}i zU_oh0@WKER3Jr8-5J=CB6y^9rQJj2S231^Rfr<1uTeS1pzU-iAcB8C|CI z<~!HCfpmBY3b5RTrkN5ir+$o*&eS7lLb?k49sLexfW8HSKXj}~5|L?ffpe?sVehPv zfijaGdnNtG3j|$xNWJ~58$qh2N{6wll!C);H%g@;fWMLDQex_=xy^e+(U5UGQLp}X zD(Gd-0Og(&T02`g*v`$pesEUZ?DI5hhONsD^!%o8|5eM>(q_LX#{qO7Q`h{sAYUYC zE^Zc+A1|$ToH3X`BjJVh%|t-HbTvnWY0QBQT5npHh(}E)HSWMR38ub{gCwxS_+#l7)hxwnF| z)OrT(^(#Kq6*a`p%NK(5cU~Z8`;ES3R;F!{AUq@_K62@0()0?Tx2;t|S?9iUN8@cA z3H^=xr47b$t3qoqk*{Aix?bMbbBq*8GRF+6QNfmvW_$oueH!Rley}+??`mvbn9poN z{^0LvCC#r#(mEl`z0vZd8l8s{f#9^v^p%6uk_x7(`NKSBtuLd0c!$edlM~rlpt$ZE zCzR*dXEq-Mcv{|B=f#^PL?8n&B{FK<&6rGqpGbb0*E&NQtfpIAvP^Ej=h-^Dl^L5z zou9iss;(%1fl{EI@G>fsdrjH(YCDu2sax*xm0#j34o9piWV?OD@iQqq{*Ar3cSae@ zQAZ->PAXgaMKq$38>&3+z>zg~>ORczD+Z_yLjSX)>7uGz!H7e$Us_o@dax}wGh3IE zC}iO-J*pwbwM>)M044E+sO2Uwn#U|7@!r zbGtJNOr2z1DTTU{|G*N^HxwQHRQe*Ea{f6zp#W)vrkWU(_-sQ31LDQqZs+#4vA2&l zBH|KjkSuJq79VNI0ZTDt$q0A$aX))L2`*WCT)-5M#`(MZDpD6fP!v)Jp^T}wwP;qt#fHXA2d8UCPdM7qHo$nVi5`+f+Ba2XVwZy*F z>J74x)xHpKfkM!!(uywi4@ShFM9{LJ$a<0Om8UteW7U|{zMcst9#?8b6)iVMDY2~# zOdC14k`I}jWg!!@Oy1id`w1EUY-VT0?g>^IC6w;MyTYyb?GADiT!QjzozYq>Bt062WwL$=`97R42g9k`qjw*=fy!j*old+$%UEMO_g`gADJaF7T`YL*1c4Gz-I!k+3 zcnmWfeH2&BF71>4FpL878$a?y{3TqByC9EWXAx^eMZe1teTB&dQzmpgWz;J(=ksn? zyDyx2U|~M58dH27O_hb;2zif=x0S|bx^fdFsXYFM4)Z~Tz~g>w{`N<&fH}@wOlZUS z{Reyj;R_8{%bQm$gPyx!s0kH%9|06UvaNW)WON?RwW^WO{ANlDd^N6bP zh5A@recG1uWVOnV%iu8W(b=}EF?Agc#QsG^=I9OOyOKtP)Z>60q4ImYS1eyV;Iw3H zlP$P}&S^1`%Lijsnk>X;VA~L$!rQ#7EL#s}Tvklw8n?xMJt@b?6N;02Hi*?{)MPJ2 zqv-&@tU?aI_Yu~fG^U!y#4(ziuT7LM)?FYgeqBbvBdBZj{> z$^nKrT!q(CQ9@Z?#kB2`2V+h!xsKOxUeX$B9N*rDyCT(JWt%NwQ1Pk7V~BQF2aH{< zC{Y7b@bwM-!j6_29Rqu$R0h<6ZqJZ5#)43N{sl0C0@md}z#E(^efB)|uC+-?lf%2- zd968%Rbs~i)7Un37LgPe+aPRDcxA;8WHrOO8J|CTL)U`ar*ISmbeEQbV^qH8YSz-; z1Z)S9OakSO1h9v_{pq^`lH6QJhPY!cSC7anCyTQB6W%Opk}&rm0co+e9S{j_t3`0r z#f49}ET_%2v8!5+4$`(v4%SHj%xEbR%TWUxv=)?Y@F`d1SeF>$3N;&>n`ZGKO?=3W zLi{uN&Gg^c;|U_V&qS*+DyN3MQ-@Ko!yYQOJH?Mh6Y~H8{D><+Hxz$js9`O`YI}W8|-u zlGQ;x)3cx3H%X*$;}#lF<$9-xbV78o`kl(QyJq0HF!tg_LMg^F?g8D zq>)OoA}e^!e|2n1e7MWM}KX3IKPy?^P2TRE?a}Gce4_`@SjQL+)ezk2$eP& zzsxE0mDXEuy)d|1*SyGwU`OPG$t&#EdGh`G7*Yk7*74P`} zn|O6 zIWp1+-=fk^b1CF6ZME6{E2Lepu1;tI%^Nmq{Aqyw!C|v~bgc*zilW=_293Bra_ifd ztHG0eBK(8@0PV`NdP%a9xGR%Jza!}YI|N*ZT_~;-U<|V%xTz z5LlZqOdcq<)!zS!A7p#6m}75m%TM{$5i%$LALT@4e?fg@nIw+`Z{Jj zC=qnclRS}0XHPYJwcAf^Xm=X+sm1d8F@z&V!C0X1fj-M|LPslbvT&C5ge_}U0N0Kn zi_>Z-rZ0M7HJABU@8R4etUn4OH7Ft`Yb!JxX5=7xvK@CQFL=!u^`~?hLfIf!HLcJ` z^3LN_aiXIyK4nmD*r|2-zU(aF@`B~Az95a7663ci-j+T+58&k2h zDz)Wjx){lnpP|_=7v#PM!I9KT*O#f9mT-QbEWi;|kBm=UZ|Yw+5bWRlE991@Qp_QB zH)RTy+mj@g;oATjul1X+KX?)6ONDpULT;G{7(sGq2OO~iuGJmw^eWFR0cs(H-^*d& zZJu8i4SxxUR(C#=c0V)xxEai5dHpeWxuSic+H_53*~>h~@b$Uk0R(dSP%MG^$#vGk zv{6^UyEZc9p94>xEZjW9T3gD3F*BCVWSeDyTq+1MN_{Dmf@DC=>c)zlFQ0IQIKI{8 zg@4vi72>VRY^-qPZ7AdS*4L+0Z|pQOV;I~1B{X64!JE%^jkur;JCuaOz^cE+IzgVB zb>*(|gY2sM+k||Yb==?s<}h19jg$-lCcdiQO_&V08sFcXP8ZFd<*dpOP}obhW_CJAFQ56J%(`r({%Eli9SLTK<0Q9N_Na~A0&_(aNp#gH>D8C$k|Rus z$X)nxrOzZk7{P!zj&Pg1`K|?;cBwT>pCeEB45s79~~AGrYR8RczCL9bJdfMl>{bb_oxb| zvBjW!_9BXZeQ1>V_=n5|V|Ii(mWE9j+u?X7r?>p$0!lMApZX4a7UF5btom6Q)2w3P zZWu60jjEUA=TPYJJMl8d88SMAN~001XyQ-2!Z$-%UjWa2>lxx2bE)ize|p~>%d{am zmXXmrX5AxCw_9$PQ<36|;p{sm+e#tF!M|q0Tgy*$Z+{}@xD#I4$MVn%enrw%Kg&!# z?=HBb$Jie%+UR$eL1gP5sww@=KyDda7W_23l&@q)Mf~P**6rYz%sYUck_{cGT->IT zj(5aWhoi8UnLDwCQXc>#H=VDv12)kRJv#Vjt3Yen;T$dl3bG1f3_j)ElqJ=reV1#_ za)O;RnR7I-#jR9bv5u&Kd~3mi@3}@cmQj`x&G+408k%PW;>(+}8>z;z46fh4qkMlS z6jtft-!DEXk|6?npE*X$99<4Z9CxcmUgyetNfQ|h?Be$IcwtcG)#OcPL0`u$HMVy2 zrtd*bquLfuakxa}^|ygHWMRr%+<%wgv`%YA13y%u9=lY(JFbN`Opl=fS)OkgMefw} zwJ%nnz|cJh6y|sQSK#8tSbb^L&mf~H1=HcEbGuvQN01rsK2O6+{y)HvteGAar4a(C zWv@Nf(Y&=c*wq9JzL#EMz7*MbcA|UaaD=thJd-NJS!gA!KLf2AFAg>Pf8z3>uf!|c zbV8ax+ex82cZy8RX6(hiBE)Q`s@Uw0gD;W{{obC&`hJlA2N*2Vfbnfn3n=ae=HBH1 zA0ISs_`~PEb#{~u_v}>f(Hf9j`~ysoJ-_{QbwoBJY~vraMj>&nznIeje=Cnn7s|8L z45OiSEm2pL&rh-kjLs-3$Qu{MHr>O>d|6(>@fJs2sGd zHMYe_VM+f0F|!FiWS2x}QRTRg?Qqzh^}R_^8^YKcD4_CrIdo&o%lL;8ou%l2)rCq}in=lqkMIgc9v69gBb z2s4yf;uPVY$td=jO4E`r%PDpol$5_jW(g#v;V2^~`J(M`X%_*BcKvaV+2N!cnuo%W-_DE-_YK%En%w{`2y}d|SRf z`uH$wo1fui^G$b**bVNOd_UvnwE=Mb0eGS|^5=}M{`Go41Lju%hU@W0O3!H*EA zQT>M3x90xGOOEUIxq{!C)tz{&Unm?<#Yd%-&?|r_NH|s6s z8vQp}-(ZSccmzD3!6yd)wHN(1gK#ViFROk}nsZ~8bajK%_5TgLMZV)Es(XUu?7)Cq zZ*7bAAph&-nY2x%b`{pv+M%auxWW6ocsaMX0x{r2e!+!rmD4yOd?XRnzx^mIE)8+~ zFL1~fVTK~@4}%LE_YrpMW<1>A;2}G>w}`4DS$_C20tECASN;KJWc^`tz7m$jONSdj z#Muz4~<;k1YIS|PK#_DGCWS5^fy8hcAB0Q!38`V$U4duwd9YgD( zP9FFdH&}*s!7-qlpm>(U6Maq9?!QC-_(!HN1u|MZU!t#{;4}={Rbh)Xgj=cpKPzPa zSy|WsMQO3&d3hqfX2?Tc#eZW4x3)B* z7YjkNLj}tyf_?SF!WK?K`nAeJb)bUwNa|RX;CDV*7!8)E@6_5x-myCIM8Ar+3qr!L z8GDJF%O``;pu2l8Cyn$g}>y5Ng|D z|8^x=(!s)u!E&dJdT9kZ^r;8nT~MIMp*~Ij69Ff@;G}SF(r?NWRR%`pQ5FLEHOUX^ zTODGZv1S<{GpD0}!0~JjrJOt4 zXWLQ!2^I@P=EWcMCp>!Q+0>T+Nw{1Twxh1Ro0Pe&(yM->%+S~R zF=cM~2_g|u2W^*((<)R>sZU9O=s?$w(q!#T_;b@66Oj)MHaE{dEFb=fu#=nMD4CT0 zpv&);4KAjT02BTgLW=dgrEZv<6%d_p-nzU>4Zpq0A&vbkIM)y##TMgJif>EiLOb|Y z)BErB2M<#VDzmI@fLbAR$jwWQY5Nmeqtp97+ac2QYR>z_#^sM|{RzQsl4IZ4t}dj` zihf(A_arX$t-O68XtSJeL`C@d-7MM{Off5{Cdh7!MAi}O;N`4AR4Lc|)MHj?;QG$_ z{tW>EX>-YM*>ME9|=qE2{w{WqkH06*u9l1*?dEVzj*G z))i6Q>~=0}K3io{B}7%DQ}>EJiymt-{>oTzo)F0)K2!QdpR*`*6jJ|p0Q|LTP_#Sy zgm=HWJH{KG} zCG@^k?Sb^3zRz~Ze>GB#{|z_d9;q8VaZuPGO&0*-xCU>>5`aLaa11U=RA? zPdINvskZKFVz|*7(0y(wG@#n}<}v@I`IE>+Ne4lUw>b&)x(_2}1;oSEK3`DrBc7kf zR6q1{Iu&p%hj1kah(i>`Ko~g2kL$u==kWW-{oqr;esmV={Yk`kJK^&WU|8&M+L`y^;ni}NST6T2Mfd&%^WWTyMxs!tGLsnN zQcp!Kvnj>`73+=Uq3(EkLS~+62Bhs8Ya2x?Ou#MLRJf??n}&T2EZasFJo_B;(kS|$ zC>b^eqP3f4LeRaUkFTMb-->AUAb!xx`AH_rn00l`@vUCd%Y4%7kqF25_EGa`z1j~jby|K4DZImY-vBdSbZ8>0DPK7bY-4J?V{R)v3X-8+OX(Wkzh;V)5 z7~_>stV%$=u{7uHHMg9MeSFNmRX63|+v?6%Vd*qQsh>M>Ozc^?rjg%OSNgsWZ}2%k zQzBu^9&ER_ntpM6;*S;BiJjA^iqDJXG+s9P8cF%dnl^_W3#IGIXTeQVt0x6t+JPNBgXTVyHy=B?+Y4j zFLPzWBcwR-!wxb+{=M~#vz~8uC$F>AI#WMMb)$~M73AKpFo-HFD3lz1GcC9LQ&H~E zwG&A)md+=5qo%_q$eI^6q5X-uuj;Kiog2S(eQ|c8D<8rJL zKWJvdHSg_?52;CIRLS#_;4u!ce)?1aEzqlhN*2s6%}{D47>_|uU>2z@W1qsE+wQ#` zH(#n#ir&oj#mO;;p%!aKEoRxxe&Qty@I;2#OhcvdUUT`p{uk~mVJRy9_RC*J$L@l< zSP|az1@AzopWAUdMEdVxIfFN>BNQXMo3Y*f7#Qf*%hhRwe`tzW6^*(lO%voj?N(@! ziM=$6L$%VzK83mxr}7?8IL<9Nw?=>Om{HvG35gPD^69G~y%|eX4@_;_=2aP9>^aY& zOTGR{iB3*xtjBf?axZW|Zk?Q7u|0Zotaq#!v!a(0>-4D1Xxm%cq*?|)&>qi5rydh- zu{*t4@zX2^Eu@ZeFFlWAYV(^e<7$w4CJZmaPB15O3*E%sBKqic&g5zz977D)rH|G6 z(6BCvJQGQENy_;>mO{4*czzvg+EQ~{w~G%sEUT`LMl!eG6`WUNCtZbEWkWV zDtE-&G;vhkGmiV&MEUK+ldSVn?jF6BFkft=M^G0Ls5AI-c{h&vzGwILdPpS1YqkBo zU8jRJ843Kjs+6*`PP`MS{0<7CmUvie_i`yo&W? zB_JS3ih$(MB{0$@-J!H}*T4kl_WOI@C(e7G^T&D5ALqT!AN%4G@0-2vz1LcMt&I-$w_nA<^Lzp#mi8Z8Y-zk2;$?AG+Hu>kx6{qzkLDVkNS(C`YDyyd z{N8fqwsqRhZjQqPgWWgji4PwK?ZzgJ8kf}G8Z0EGk5NoU2|%g62P|h0Uxyyd_w*u~!ds-XY>jeu ztm?dRmBQAel2lB4y_ohSwA_w_cITWY=EsYYzMuZ;4l))wX+~Ku43m?De+Pg2MkJ@! zjU@7?PZ%+zhH5C9J}5b=9;k^=dNOGD95fKV?1ci#)+dRA?&@yDO& z&bGCDAEz{kRo+~%ch83#cUKM0CEV;iD~Pz&;C`tqu1z2Gy)JuOgMUF=@=Y}_E{B$e zhS%qn1#M5+9JkLtwC(X2f2l~u*Yr~&f=F{k)G>XH`)08NAXg@{hr&9aoBC+#M3fz=``(A*<{}NXT4ny6;`zNa^0PZD7DqQ zV!CpFDQpR}e^h1*Vw5PjHvK2(qtg$WaA=ZhV^sMXo$!~WuX9?}LEfTcx6KrVT!ZHB zeo(xTMdVP=`AoL0)X?^vq4G_L*OrD4%qh=y#Gv<&6t4s;&48J9lewzLaE^6Oh|Qk{ z6S|9v;i>1*MfHT&lFhq0Jxlg$czf>1&u4$wpMC`++FH)Ih9jiG!W|gF4Yf1j_Lf`WBZ)Jv`{0#>2u?h(0D!!i7 z=Pc7F>v{awI;u?WcK&&uQ^)p|eS(&B?N^Hq;|kL{bV)6eCt=33e!lyjTTf9{H;w;F z;JmZa_6d*_qAmg>VP7VFV8QJ?w5A@C{G6}8wP;HFLHDtcNi3MLcT?rJSHug>SButW z+ZUs*qa8fcDH?88p6hLMFEw1ub zRX`p1mnFKno{QfJCi-y!GNEib**DdY{fMu4T0%DpbdVC+tn`z=7t}dibRKCxAu?d` z2yWvMDZQQyG(u8Ib0c{=tRTJuTDh%nhxpPiRISwGeo>eBL8i#&SIdtckEB)%U`?OK z_^7-b%BT>_Eg?Ck_mECNeh%hM7E7Qc~l7Ts5bb7I4r0ZbEO!Odn5 z7jb(XzSeZRmypmB(U%GG)YWChe;NoMhd3{bFlwpu8*Z<8%lKKAC4H&0mdr-8?3``7 zl+Y);dRio8Pi4!ITTo#mn?1=%tGFgItkxyEO>bVl{bt!feevP;Y(G%p@+(l`+oO!j zZBlW=%cLclnKYd$fcH~hIeD22R(j&`=ZKa*qxlP0kKZEd5xMUvFi(a|EH3ojW;#9H zIlH#|wc=8~aW!O5>=uwFL~kiK!P*)~LtYo~gYK7ZKn0bOlpXP0>|{$62T;-ifys5GP#7XxNTyPu@f# zr_GEQW0^?0_FcB@&2r=Vy$qry*ZH1cc^}>9Xna~ZSLyvcPzl89L-o$?`aPF-H_Nn2 zLy9}1`jpyHevtxi`&FkCL^z-cPi{WZBW}2sf1Fo0CoG}IamP~H;`Cdtldxx_`R8ei zRY^MYihDoLd7YC-syMA&j-nIQf7Eo;k(WRMJ6tuV^0dH=P1lyy2MaZvNW3_bkv12QDd7^`;l{lPV9tCZ$+%qB>dF+`mJlm9zGK`d14uYMxW~)F{HE=d;rQ3~u&1uX`Mf$}J)7FStUFWh`R9WL0^hO~=TqPK zqN=NgQ_0W5m47CR)6dmhRLt_bJvSgUOIlZ;!Wk)F2t*lu9TVo z%lGeQ%53&~Ua~_JI#|V3_B)+TUG#Xpi;7yF!yNq+NQR`9p_P|!DMu&sV4dXc`0C?U zwn~|zt!l;|v&7BGmak~~GHZnOee)h=CzZwz!evv8#k@g1d9I}6g~ij6_Y>jFQ_~A|OipN%k-~Lm`EbAi{C29FXc=YHw}81Pc#wM(J+r`| z{cGV5(Q=xGt<;EV#E%bJN*cWLE!Xiw@LLEQ@U2IQOJwe$Z4-%@XS(%7Mzg#pgOc7i ziR}cr8wk_dX(IB%^0)O<`1E%(BuwtTQ8yUQVYn}oAT2H(D9O19*0$fw3IeJuo_A$2 zp>v;4`8{5;zXI)S2);_%FSEeN)IWDN*iW>P_JQr}|Ja}2Ut(xAwsE>suI}4Gn%!G$ z$5W}qczR?ZsgyECDEl?jerxesAm|4xi1%1}szc(}SgTIszTnXnDEJ6!*ozfR%7qCP zTVpGLuEa-hk#(HN5+Xb|LD`YBI%+`jHof%u2H5Bj%oNbqB5?>f6~&_1oKz$42eO+K zKa}=ax`}M!K}NS8DnQA^rG%AHoI~{;&aVwkNiz&;Q$9n*5ld0{+a!d@aM6%;ZZar< zPwSv|tXC$rb32%obS9n-z4OOw*q=B?oGmZF5U<&N-{A^mj$)Ee_P1BIX3aGZ0Hb>0xhGn9l=VNe3;xHu zZULR9Qr^r;r@Nf=+wp&wuHA`Y^ty0HA~#uxUsbK2s^B1B;!ZIVcy|D%&%OeMdnN$g zp|3zcQZMGA7EVmv$MR*iW;ms+EUfDn&FP`8D8Y310UnxGkmft+ek6|mOUmWNXCTJ| zcty*;9h>SO26XxmIc@__uXj~cIZla|qS`=!iy$1CTkZ5-YA|IH-SlXxHeYFHpxg>v zPMzo0Gbn7o!OR=R)af02UtT|9WWmz&RsA#mwg#z(c3VUGeuBjTeZTp~DH6L&Y`>k= zJ~}H=`kWl%taRbSJ^_Tu1Bt3VH~8ID^@$}rfS%3r0f8egKi025DxhN=u*HpgCpAg- zJ{TBu_C)-)ZY4ZZIdqA3ny5w!Wu;a;BnB!X76qRv9JWc$m2Mh?->9b}7U(Xo%NwuE z17X$xYQ7MfN^=EDak>IsL;Ue$0Pke1ForJdmWmM00uG%G+6}1X6^K#ZZ9T(U?e;nc zjT=xT2liW6_X>pcMja!u*Xe)`P;CK{Tuv?? zG@&XH(HRq=HTluNvA+a7BfyeB>LHMlb$?gPgranXUI59h2V8!F3_Hck^(lm~F}D-o zm?OTWoar2X;A1f!>&x^yt_0%_M_YCQ3R-{k?-b;gfEcO+hyZ#l%b+=9+&QvZR#iar zPLlc@ATe#hozfMeUv-9K0Zz6kbQ&f<629HyfZYb1Hh>L#Ynu}f_-}$lma2hT8%8vi zXV8Sav)NFT(ng`5eOmLMmQ@(`+0T>< z2%c!DOCCr7w#UK@uvN<_umyA;@W8L=9s=zZr_rTACPpS?1@{>V_(ZE{wShel(fk8< zz3=6li1>&2(UXY9&j{LnKqY)%+{xp7CHP`k_sAke=8I&YvDFA7OTT@8c|&jx<0x1ffxrg zKF+G(?%?5@ojJwPa9X(D`+?Q$h3D()0YXJ?5Rrjc-X>nqTc!c~BdOwiCdDc6;9>J^~!F|y%XF3xKJ zk@5G)*etXDH60(S5*7W-s-{%($7Z}Z1ic2#NnW>n{9(4`bDSBl5j1d@5mlVWKkp&_ z{R8QoW?)^{J!0%NSl5GifP#Vd5Al6?L4BfaX4~~lt|5F7+-*eNrRz=0EFd6Xp#Eb* zp@qu2sAu$XM&;|6HaPxnHl(s3`s{;y3?D>~#_jZicW>)HAVdDek!1KggUVO{kp%=u zIldSFF#HklV6x;UurMlZ$PB!j1E*h=Cy2e_gumOe_a&@bvI=AWZ!ER)KZFk;zg{3- zJ2AF=xP05#l|Cv2dUWr{Dr9d`5Gx!P9Wit^n0oPdt6{%I_3fjN<$aa?dp1o-KHhWu zhb0&H(N+*9#IUs*HXkT)b04wv(GPI>KFJU{0whB?aEk&N)i0~y03$&zBR>9%kq!`l z|2AqvBtJk0aY&13=Rx=5gQcw3{;nMErKNben}!g^d_a6lRs81!iT~>(0KJjtt_WU| zZ)SZ2q^19oJY>7p7Tdq~1pmbf@~^XFMp%KF*c<|mjOKU0ndAObmL zlg(H3lqka1zX!g>rS-3k>^?(P1vFdV45d)`VNrNjEeqMN1gI=^QDjzP>Jt9XQCM~V zV0m-&cVXXN{~pc2O2j`TEe)+Ih*Gu2n6|Tj2EKmTmgUU#P-{?3UD|pUj^}FW04~It zPgfw|o3cQw`(A%VFcWwN*_mnPF;NXfL;dBTQSqIrqY(59MsSah}TF4g5#}OH*v>%-S{9q0hHA~y*f_<|La!C)#Vp1yGK)VA9b!k z##MsArh`ujx&I|RmlH7}UhQve8e1}Iq}Hr@u^!{_gP*1X$A0a_oAs;~GzehEU%-qr zDSv0g-@&W@Yfn15&t4sxGIb#fyi-}cQV6HewwgP=%LhO5kp-ZDa~fCw79)&#By@78 z{~~2R_&>d<9nhb7|I?ck!wRb*=(Zcc)w=x{L9cic4*d$(8?>l27vPeangz6!y4UcZ zEvt}2aHZ2f8+sox6V-7fHzHeD;3&iBoeM$l>Od-|qQ0aIgc*!g$`RFnO!x8sopeqI zxGW~I|L2JTf%~&X=vcb7cr)r~g|u=J5cAcQ0Y6&=6NCy3$M9#G)GO>+9xYSAx%^7~ zOKMl3aq_Hx?uf|lo&Mzy`45XW=Ulp;qmcbZmkc{I#Rev#m$7-)!s)L?VXfU|^8UW^ zYV}nQ6Y8We@h=wnGKFbHV!@7*?9>lN;+DO>qR3AhEMW|jTf{F2k1zFd57u>Ns!iq~-<%GRW?xdy zCV5ed&>b~yr_M}?0M;$cl*2^rPrn&(R(s@N%97Zz(rCAvV@NC_jtFyTx+xEtGbr2M z1OMvSb zA@1GDt(m@+04V$Y!h@;~@@tyk=90P(!Uho!&h2>cbDigKXN1|(V`$TX7+Sf6E{(~F zU+qnOT-bcUQT~&IIHNkNIfSF!b_NhHZ+ZoihwlD_yPp&PYYHl$!=s97_2kcWbom8d z4>&zCv4|n@TJ#R(aolM0U?hC*6R;rgjx@LRc_s;QTgF=@`mMb_B{>O@>`s>73l#BQ zE}yolFiRP5s7t2jz~}y_7ihq3{whpH27CN8&V;0px}_Hb2gnZB9#*)~eUlZO*)|~l zkbYmX&h`WwF(J5+_kROWA_6G4b;{WHIyGI3F1U}1bC(2)a!BWANp4wqup(5>i+J$A z{(dy7+}S_t_Yf#-N#Hx%+3iy^^2MM9}iy+!5jc>G0+AP=){0yy9hZ zaPus}S6T;87h4KgAs!2fz!7NGqqR!ZFr8~iHCG6h_qR=%iH4PFlT3zU&RlT6jhUUV z5ieY4{R)KEHJXxdsUe((qP4}__}@g6ZNq^;nzojpFHLm{#8s>g2H*4jR-kQgs>=vX?#U@rMcEIzvnSRZ~ zfco$UToKDe)(@lXb4R@biJ`lc(Y)(UR9$$`79m`n|ElIcrm+~v;AM0f7TUK(16=QsjX zI5WRCK@6&x51AyqQYKMUci-NIC_LnA4}qdiS{)^=qn4Ipa%yUW*(GeG;nC1gf4Ced zlQ`!a_pELPdOb2$tnoM_pTok!hOID#KGc>r@dycxF^UDcldX55s)biNl5eNSQ+QGM z?(b4%p;h@>cR|Q&$VuV*i1M)$D95smv2aln(1I6nHHAZbf5zuTYspI$9UpJ-x~C&D zVQJyI^6rIxXNM=aA@{M$F#uuK-AgqL*JG`aK`~>(J)nFYPamH`*??ZeAlM2IprzeN zwaMKe==6L0i+9IqE~W#;G-)MiDEj@84!)O|?y#MG0&~LP-~$4<{a zb$KE|H|}NM+ISx6m|Zivd+*bHPrQPjTf=6CpFTJt-hnI7>xFkxnm-VskJzbe^b&Q_$(oHe2pa7hK=UHwU!Sc$86%W2 zm};3?OgHS%`RO8%A&&Xh%zzNT^ni<8J`0vG&4h*MJrG<_w!rs#B$^b)eGMGWg*e#`|ZplRUeW=Uknia z@LvNofG`&vudGBOrt_1h*a={%oJgGR3aGvT%Stzq`%19MhUFkrC#Tzf#*$VhFtXpg zeFPqDDF`!)x&Oq;7q{MXXj!1&ES0e9)Qu-GCyA3_oQrh-u5>R!8ML}+G4?xNXRKM0 z!}+4OdlRnY_c=7L`!s)#$U-_Yg`i%hsHKhdQV;&CT+BZ%67bh2fTJ7~UHC*u5exkJx3d+rNZd4da5W%>B1wek$|HDnEJW-R(2^@aAw# zN0!FK;LFdeWURMDf5wEiBcIJr&GQfSZ$B3M86bn7-?)n&jsMn`pJ++z3$kDNmXqd5 z^&^I(;)fm+=hUx-TFnr5O&Y9=5?`WS3|Ifpq?Rpbx5RlI?@-g@MSqs|jk0jA2F9Ny zQvL+UY1dDZW`NEd0*QXTt4ODX{Db%`-QR|yVAo~!|^0oE}$ zGq$#=c|+f_iM5nZM6D+S=_7fvul?^dnuxoNY$^_k61s#^RJa)Nhf64}tghzECgk#4 zj^6S{!X(c){3Qs$_-eLRPfZ@)_Z&Vv?t9>+e_0G2K1%A;$kwwGlS;!_{qYltY>*%< zmHu}3Q_bzo$a__Hc$Lr=XCa(kgJM^peCTA;-R8NokSkF940JmjNrId_y7%Tf^x0wN z-30FtO`WR|oWJh)%iqdOuoFZjsIOq?>lEwy;XW=DOz>hHxgrU)Hs*tueAiBk!5UO>GZ9Pu`ha~8@q9>hfa1& z;zUMdwuNo-gx|WoF2VM4prrQrd8g}^9-KH-Tk-ZO-6e`8qOGR`1ilkHW(r^4C$-HK zt4{0Iub>7Q_tk^f8X+&_cHw3pTsRME^TH<~@kgoal!`Q%S<=z%KLK_!0jUA)!Fc}l za4=5M7)T3n*j|LPqQM3SdfWUn43A8LmZQXTcy!%7bZg}N|aGBGoE6}L1 z&nfzxig|`JPyLrEG<9HUjbwDlf~nY6um@ul$%Fh6w-NZ?(=bUVh&J`)>$xb695KY` zsVzxRUJ>=dxaqz*SfaUCyf2M05`qOwaG1U($YT=6?J?y!5gv1 z?z+EaKQ5jS*oa9873CDkjT2aiHel^Q;|Dg^Gz(}OPV--9WEp|nX@6&_cp8CtzC`|S zF?a`BfY0r@3smPA1I93WGgs5tvUgXUEr7^c0E%t1=UUeOi&JUr6mB#iEav{7lENbU z$&x~3qvQGy;nB2DdhUfm8+N8`itUhUh^%<``CS0b@+Y4Pfz{!M)euWENr=sl@JkT# z7INz7zIagng2f>pP7pa>=XN!U3&!a52Z=s$czSIh@w>~VN0~1SmaO`E?Cb%@+8iH* zJ@dW4eL)bcC-hNbpUpVy%Hx(V#Y3x#oTcR^RM7;T{vY2$AC=^3OP8##dpU6@5Qy}z zeemD`6@$Mq(Zz6Od9SA~n~$uMhcTI+jxil~7ywO;7T?Btv zyGse`f?l3hTU)xAJ2jW-OuWtD&>eX_iriglh}~35Vs(_1%?ul!a=}QV*{6PFvP{0k z(0`bFX;{F!t)8 zV(sEy$GARh!AbT_LfxREV%{$2c6MB0-#CG@m^5<{S%hv_u>llK_(05ABUqq-E;@4UPD*G|CQ<`M4;z}m+V53c5S>qx+MBq zG5#oFS!(4sYT4u2tU9oYLj4$}HCYN=_9rmpr_7xw0CAyP`u*w@>43|TM2A6fE@8m0 zj1G24alst+TcV}YSXHRz_Nl)nP&*|czRWCD@J{qg<6h>Dk#_{o5>$itgrKCEM5LoH z+lq<^I3ZamN`~u>kD#JVt?_VNQOjO|4$=%ynp6tKL`0O z)E(^2O#^!?tGpRy>6mw$^=$(k!L|E3`!63>DPFfrQ05+1<}?nj=cAkBy4Ox>TNwby6 zPK!`I&kMqDUumoVgO1gN z6&UZlVmx8`lO=bZR_p(zp<_5f`~5?aRD$z$kW^$zO0uhv(t6CN6M|OmgJRMDm|gz= zIY<2uYA^VAYDdv@y^Yemyo6lxZ-3_p>VgKfr(S^?hlUTmE{e&t6vF7Fa*%avvUI$A z1{!WU{7)m*4fMA4Kd5t7%U;K2d!yrBAK$z^ggdBNo@Sreg+LIiUl*=GQ@nuR;H1~S z3p!+kbGvs1a>)zm!HP&btZXfOhXRSUv96=Wb8{v;UaQe}W~a%014SAuENhw=NN>Za z?(>jaQw25=1RqA;Pz27g^R(_OA7)sdU!@P&w)n6uD zRS;JmT$cpJy_w?m&9ke!K^$o_tK{`SC`WV2sLjJDoiSSKg(_l7tJt28o`Myf340uG zi7E1>d<-kYK1@D(Nxiu6;Om#w;Hsn_@?E?`mXUj8G-n^)@3zyepOzgjxuuE@JUJ5% z_VL);kc*X7ta1LMqY*5F&8N&17V_7@#%TbO{?Aa=|3vIOUu8dnGV#@88Zvf|L_w2c zWMYyra$SLpyPCC80}i|Aid%Lq1qMgdK>Pm6k@T36su27{#IeFV{V@}P^SF@8{URpo`HSk zqg}7X^rY;I5$}xaW_W9Tn|Du>Q|<4a<+{+7R&%^fR9lu9_NjUZ0+)K#WbKk1T!D02 znu6t-V~efLIGyM1r2M0&^y7QEACPsGxWsxA)CCU&82f3Wr&dsrbsAc z0qtOZUg@6-2xK*wvl`jcvGn)690FdK{tPCcn&{M^ycuc9g#~Y~KxKt9W!*VJKW)uF zex%Wkjpa8)>aC9y7mS3^YMEGj{k>a9KkKQlVL-eLO2k}@}5jy9>N zEp14lSjpzvY}yB1Qo9#6*VN8W^YSCLM*p$Gz~s1Wgbe}+5U+9yvN{~}=EYKo#D|KzCVeL*yp zwQbm4^7`S9Jp-PU=5SjF{R<<%JuL5KeX#z%?B2**n-723l6Pxur^bhP`0J1BY&Nb3 zWL|+<#ZcxD5*t~2ZnZQ0JOe%@8=7p{EHLiCM60_(3pfV#d^dt>yytiFyu{OVPu z9s<`Ne(gNk-lHSR-~HLqU{QjJi6Btyj155#CBO@Ky zu6aGKA?CvPoQ938TPqVS^b|1sNH+8t5jOL3QJwJ{{&zTmnN-Sl;$q7H#{_5mp&@@^xbVRedfSHOK=I64-h~< z!;^^|9BWT?|fajCJVHFm!+z}D8@*u zzy9;MnC);YtbP9cyC=W2OoBhmX#xvjkq=;)w{MX{NIJX2a}t-zK=_6;UPQ|jzn{u0mS((#E%7+ zRs%sl-@DUr2#b2IEZV4$arf?Q1jv#BM$usIsnhU+qgF`ig>F{tTL_w;IJ96wUyvFr z(o#K^@xyjkOk$5K;&B##PPCHCoa-?0?yeu;kGy{Sa3MN?HJ}fGN1KRCUfLLSFek^; z!fG|wenyC`Ds8a<#`cplWImn}YmN3kTxq38-I8UtvoExT@mV-4^zIhOk+n)&e_B(E z4sIp7rSd_u8*-orzYuYvx&lobKLc!Kv<=~bP8(27_IwxU$V|Oygj?^B@ztpvLptmg zRAjD-!nB?*eGO?fP2k~0Po35+Ix%c4+>&Pbf?jAJ%NzV$W-zxfrxNyHWH6{38q150 ztQtP`f2KYz7R6%7T{+s6Rr~W3FR1>tK|lFAd7pyZZT9*eYA3Ed$O*#!xfs%vA@ z_PMp*b7Ph}=#v`|pOIB|O zcXF4$LrPaeizLRwkr0F&{6InTf-8#(x+4bI$zRw5B~j=HKtAXdC<wTQtkmyLH;V@4fl*97`XJSFvu_#1PoQFLmewWJ}}1{bcClmOn|hq$yO97IK@j) z!COC-C9(ykLL7a{z&>Y}pbN}d9=hACBCquriEI;UCnohB^&f#?>q{_jN=2Crqj=kVRk(vfY4T)<;xfi#uxVdOPZL7>NH8> zJ8ADyn?{**DZ_VtgDaK~Q{TMud{8ejzhi(&!&Tw{3vG3xg$daF8{j_^SU`_y)qt)Z zV}S@I6Hx?C+<2IHAC9Hj?h4>V2Vr=;9(M*jns9V4H9t~UGcoDCwHg53?@#)U~y=B;Ginc-|UgsE4WKkgX)nxNEh z8Y5FFiM&6ISge+1Pd<}qQa$K8`aQ7#mdX5<&}6&C;l<#6_Pd$@6ATx`&$jA@v105iiW`PJR@3y!AHhSr`ZCsw9 zWEs#wOQpb1^6}l%7#7|4nzr6{n3W{()=g<3 z#Y^dS;1gPc(UJcJ^Xadm1^eu$Au-Ai-QGZKfK3xk9e~YwLZ5Yd0zDxv!&c$8*bC^b z@Vn4w-8y*&DiIV0a=g12aUmOO%@5?izIdn)ViX;(;WCC~As`ii19J(CozgANcQfjBji+N1foLwZ;c?T!Pm)BfPXbL3&=5yq_0!A zzZa=8Z6c8jZ*17f2)&P?7rn(l^x1`)+EYltZGPpXG&EL{W9D`rkZxw9{X>*wbQ}?M8y)9gwkYXOoNQk;x4Y{SVf0^4kW-wYmHk)wPxH$V}cH z3GvL~G&Bd|v`jO zq9kwJE=i=^TAZ#<)6MkQRMW4_r_fM-Ph7lLO7fqX(VcH5@4mj#fUyxz)_2#%J$QId z37ekEL-T$n3zbfk9Ks#dbjB2AaMQ-Y9^?v^K1lGH$w;ErGG#h7lc#+_@SlhJspyuI znH+@}3$wQv88av%M&F~*WCA8i;L#~;q;mqKrrPn)^hdzv(vtrLFFbwv?x!;`XPVLR z6)3WW3r_4OaR`Y}Qn06xS~eI}m_~B7QHgB*lw&(KUizleY@-!=WGkq@ZaN8Gb3Cg8fBK5%2-u206NZ7={Zl zL~Jzhu3~ksKnaeh66i^_|5OkFXq6e*pA}w#zA9n<08qDEv+FYEN{bsXE?t!Y)J;vF)19ed#cLfTfb@=drxi# zlJWLU(U#T^E3ZV=~gCtMv@;&fJJh8&WSkZQ7O%O z4zbuq_reb#T9Y&JLfH`i5bnLEB0D6cgZG-b?EDo#oiguFWdPVz;njj4!T3p z^Ic$u=zzX=u+KK>Bj9$b*p$gC^{NG8xRw0Jwm~K*M>3ZvB7V}g>A@04KQ#In{1D``m_|} zkC@&&aDz=;ABx116)k8MWs%rLSULH>eUfQxo2{H{coQ?kXr4F3KYnaFXX3TP(XRj2 zxVdEimuMP7bY#=Y3%%kz(xLFEc}z1m%Ef+gKrA3%e$?B|(ixIZe(j3|=jZidKDP{xu0A z_9xfvF&+m65uzwYG+gn}BjWaFjmaJU8Zu?#$BT_IR?&xBB|L-y0Ok%}u|I1)XwC}0 z0@;Bh@a*N`PaGJ+_pKxEVZNoroP8*I%tO6M*&c}x!xL^-dI90E4OT^Jo%2|k$yeqY zN*Gaw7eQzuI_x@rBJ*MS;r=eqsU2+2dnt<+ zT>2hI0_Y!-Z%!W9r;%8o-HSDPR_wtw#wLR##N$ff;bG#Vs-+kN>>&U9S9C%9@#mK? zh3~cFeAasU4__MfRCX*G(A|6aq?L%(l_KB}KL`YbslX|`vfA_)Zb5Awj19jZ3WmS1 z6W-KMxwf!_vc>%o`195~ui+DoGw&R1q1Ui9+?}Xk@ypPhY}>(H!!dVm!!rK6A3}`7 zt)8oo+>mg0X>h&ynu%}_x|ocDh&hEKs;?Kc^Y$lRfo@K9P)};Nb3g>xRCs>vzJ4++ z8lpxKbnK)Cbe9={bPqgg*GID*5^9c*u+PHDx8^#6fnx1FeciZ8g0OnzD)`L1v3?9M z<0sNwbwjxn3(xMoWm?vYARxqF9}R?l6-3{+-eoJsUPse6!rlu6^e;QHn8+sDNXo_m*%8R8nQtm2{A;pTJJmNp`#9lG~<=(>PBjm%^f@cWP%obgP7O1;S@k?bGj># z{ZN<@CDIlnF}qE@=~<`M`|Z)^T!AlHHqY-oQ!+~=w#77LgMVpKBU&GzEY&UA6Kt6U zq6#;pBBD!-YHEu;rMI~xUPt~>c9^y<3ZLQ#RMf)a(cN-%tdfVz|Ha8 z%)v8|K5xGV8K1BNXE0j_pLmxiLGrT%3V*~Ehynlyt}6q9_YffTY>5wrje!ed4G#)- zgAUs}2UfBoGD#2hOH0M7AH80*5pKnVtJ1!#01BJcA1U}BTn8ca7p~*)*)A})(0Lgz zJ0{ba9a4U#Sh>qu;zm_#(<&X$TGU6f9f{}10>RfD8Z~j3BrgFiaUHppizjK$Mc1{X zDzn0xper(5=knvC+ut|j^RQqism?$9+S+oAg!Lh=kLk_Hjtx3=m$t_Y8?Pj?eOPZ zfj0fb(181zsQC(%*n3cb^RfdpW5s&`eTY^)#Ogp35MONO00?f?rEGAo6PjziokDy? zW~vJs%3PEa2S2czd%i!)8!zd_Iiyvor|mK*{CFxgpksli20HWktZtOwPs6lpbnDWI z1pQ7j;0d!y-(8rqmlj2zA_vU~q0bZ${;Du^!S`Dd@XZ7O0KE!@HwsG!Tg>%)gv(Lv zU=x_{{OW~Yz&!IQyxI7X=-%x5_lGkN0j1$10q!yE7$BnScIXjEoUG&~5RWWc=r;!j z%?g_kNfK+LNH`-E&vPRzSP@>4IA=5oMt>vV4ldH6MF#!llzqExmhbFx^Y!(?ynMxq zU?5kdqyaZy)U26yJ%AG5y#RJc%_7fQ#!eA*3RBK(+6a1d(4gTpdBOAwYOi)REjmqI zB9Cma5w2X86cPx8&qIC)-u&HmcT#ZAz0}%Z$^%NF7&Ba5aPz^&X8EDM7(lA zhbD%?03gS(5I1*{&r>0(7&K*AsuU7tlb)mbr1EC5V$}!^H0a0SY>^om~<*kp4g5OlMc;zOCVoHhMLlidCT|K#W5Eu+(zRf=<~ zx`z1NcyJX5l^jG2(aIL^$N@ozy*9DF1cX%Oj7$v5zM^O8nUYb~q6uNU*05)P&S5%v zvOr@5lmH$0Nd6@=oS+(@#C{}S7(C&%8_^7rPD#M0Pjz&f)aV!jq@Mhen0U%n%6 zD9-WkL}l#-(VB&oL~X+X>Dj6V*yktG08g8o#Ni$p4HKVeVi>MKy~Tba!0C`f28u6$ zU3!;}=v_xI;OlJwW^$}UZup)F0{o?q214FnF+xAXj@tpcl>mMOfzUSe17<~yDBfaoE%fXf4fsDJ~bYVt^UlZVVv$3b6 z)yBg4dxwepj~D~vxfC{80+k3kmjj_ZS0Kwc^gTU%jX)t1KjGtkmPk=j(V4%I>{%ZC z_9mCIb=t45R!@FBS5V;QOm~6H;YT3{s+xhyK*%yX2U^A+v4o_u@p13Uv1e+@Jexe2 z?XtA7QcX@8?#&6WCHVWdF_Mf{{C6rFCXv_H2`Xb4 z*YOSUX)8^N7$ZchinLDl){B>$Qfrx?Wxj5DoZ;!c^!R5VQL~Mq;KcO75F2o=as58Qp zE6_6){0lrWqWARy6c^3xiH}pkBccEG0T5nMniz@x-Y&~G=8hmRF)>01S=#%{8b4`V zRQHH+3a{r>YI}Ms?$g8T&OexO2WY@HA__;_62Ipc6O3anD=tK5UfvO#NKOSJNj)@g z4}FAKD2qm9rJfV;iN~bWw{-duy*5fgnF6+l9BDGh7?JB*VqI=MQjS<_jx);|zUzvsa!+jwt=l z_kA>{b$7FIdlu~}I54fWH}Tn_UiD>YDs8l8e34Vt5hnV8FfT&3}V0kP1jibkY^*_GkEChcMxk<4gQ8VVLk0$Rl;7r^c4P zIL}Z8V_DsBFd!2}+N8VW^(`?^bGFb?bsD>k-g$Tk)rIZ=xN~;ok<^)HT>K8Ypy)cL zMGv_CTCx?Og<>ny0F)7T{s6wDxGXvGc%=x470m)Ne!n@U>#A$BC$TV}pUz+0SJkf4 z71R-vo4NZ5Sr($qJN%b7Z)4&v_W|k8?~ToxY$j}l9xk~6$u?J@Gx;(T_`x8S_%MEE zM4oOn6a5#a`OhQt!+G9Gei|sIh*(U7x$I9MZ^JhWmOsx=rN`z+tdC8CHzxVpbf;utYX}Ht>PR0FTH!IPpk8aQli=a6^($?jYcw_5h`*eIGvnuF zIi;X&jBhP(cAJdv?#^*j7T(x?*C#SGKwY?d?9aPXg#Cs?VYLOM?U#@@bKLO*V|gu! zS@AJDpJ}3TYxKKwfxnJi4l>ym1FAY0-=0PNL7Z(hV&4QLE_Yd+$})6{o~FHkpB)3b zTd$+vqblp1;%nx-IcU|T(8Gm}EJ`m`s!BVl_xFYSent)}`1PhdAiHL+K*-j!>dspG zZDei>=5Y{H(di<5dpiVn8{7jsegAOS{WqJRGWU@~-h~L-JYPp&!e)8)+=`oSoAxu7 zv*l=cQ1dQI74gb*A;+sbGo|()^y9l~?CR9!{tjsA1k7YnvzvmCN@(HkWLgO0y#y`* z6~&F;q*`ms*vCg%_;`*Jt}h8*=3dJQ@`|aET_th?Kj`l$y3*1FP19&kz4!Av)bZj~ zKR`tg_QcPkV|z5$UVS(2VY{c5K>RaW(Fp&t-&v%~+#0D zIYsTAX&UF?f@rt_^FbyTtuqodwZ~}#rQp@cwC8FVPmnVxG*?%Gb;V&;B>X2!W?!3h z+^=#i&gdt5Pj0g^B2z}qzHl_ogiI$ks>Xi z(nRS^T2w?jh;#@Epj0UWB1#KLuR*Hx-cdRTC>?162{n-7d4GHFIkWeknQLawH~YKJ zxvu?RLIQbt-Y3sm>t6SLFXDZo!*UjjMTJnOri=CnAb$f{FQ#d2E3CxCKQt0+>SzDP zvvw?4d0Z@VX4!W_mDG}uMZ8i8VQ3W#a+$URQm>Dw9FYy^%Q$58BJ<2d_ciGG9j0I= z+tvW$F`zt{0NPhP{>swjp#W=n8_1t$<9ujh3qSKz^73(TeQadgm|y=eL&&wgr*bgI zR#i}(Q+m&d^-LzH+TTGqYKAeL`WXaaES= z+4UAz_EO(pWgPqJqq#IM*ujRtyA|7(vQ@95CBofQ$zYz7kZRGJCqARHv!^%d@9?@` zOC`HtXc~R61`&Q)TbWIdGsieAGXr}pj!4yvrs&QT7PHcBJ_~Cb61^&PT_8;TP(%3X zuFnS9OJM5I#{Oy~o>yUzZTj|oJS{(@s9&d8;f=GS+D zuQGeA#7r&Fs@UJQ1HU#OMN%S`fT7|e3HwvcE^ZD>DngQn;yyQ$ey+bgJ}7saeP4N$ z@{s~JNpb+xHN|M^ZYYoz#a(T;yz44z^t5%_;}XA^HO+-h1cLJ8Bo`rET;b1{z^4&e)O?ibo)j`3*pPKa8M(Jd+?tg|~ud_nR6n%@url zK?-%25yFjs1`CVp^`~A6Ieju5)8aL(Th%LrR61>UgT>f5Uk;RVX(*o)* zs*djE>$ac1P*Z4fZMU-#J0Q=Hb6v}RyX;G1GSIQwAa=+VcK_b;oFT%;M$u;S!q)W>5 z&_YPfBAx-c;vT&+w9iQVGDO?3Owe4>nYiQf{@1>D`n|AYou=>}{J-BK0P%l|FF0W1W;Jd5c`rd6rrb;WmBl0$e zwnrWjev4PzoEJe(b)!vlj*78xp4Jp~5gImiX8P14y62y?jsv%q|Hyt836;u^%LI^+ zutm+Ne%`d-AoHe(g-els5T(Iw$X42|BPQG~j+NNk%1B6t@p`dYS5&VMJfA2|mob(- z)ku1GG8mxEE2^hI{L4>H!4!m@AyTuWa}8&RTv+*)y$>|W65$f=H+S#yB=Ocvk=Z|k zc~dJ3Wi8ml2*9VmBjmzZ(WmAZNXI^x^P8<3D>}QV8mrg9WbWEY=dmG@#LA+B3uWn1 zH0q=oq&UI~ih7ev4!w}S3}&3wDB5Ccsd?vDSABWx3r&gX+l1!`Dq;}A(F4ne>Y=0F z@l%}ng}Z5c1C6g@j?`^x&~-EVqOT-BY{WViD6;?X(o!)*3Pg+%N4frT?fe9i~ZJ?`{HxhjGDX@1sXH;b;j(RQPYaNREJX|~_ zS>N2Ik!HRN5qiTRMOI->M!XOC^$_=bTT_DOyFYLDi}~&jyO^MqYf<_fN?+DO0hcYp zMZ#O4$V;A!PpoiiRVCzAxN*ALTSYbBc-Fbjmq=^ATTcb6%89sd=wZ%j{UVR_gm-B_ zp%xqirD;~e&;iXk^!EksD&6%7!3=91GUKN`jT(2!nuDNwvE-<;9Fp$V`8 z9)W&;CE4S_pngHrvp0wS%0*LvwL;Pk@@B%LTo%y}!1Icy-E1kx^`Y1O-6h-w;@awt za^5DH*(=CDihVg6J4$h^;_@5BRS7gHWr??2fl4dv9bzB@ySionN4-1q!*_dt%<(&u z2F3P8SRu{>CW)!K7fJ1UNb+ z5Nijp8^1xeifzQi0caKS@7V}0%lqH7PB;k#tyIMERxmakBiQM=WqCQSAeZq}x5Vj` zpKE1yH8$Y6wa)dSD+DBwoYWA6)A)9$T)d zP4S@?={)4?8QiVQGJ~dOytxP;8X=Q6&~fo%8=~nS>yIZD;iX~XMyqoGMSYK4945Ip zjjj{S0J~cGnR;l+CS6N4JHx=n0>18#G|M zt#mUX)#HUDDdv<&t@9hiQ{Opq4#f6ZeO zPGUx3r1t2ScOJOAG3*<^JF`8R9vReqRZpMsL-&eXuNJI;gXoL0}v$ zkOrJ9&m;qH0{*H!T>`j9KVF%>41XQH?c$pzmlEU_#Je~FYLlf9cTg8sw>xGt9#b5mL*~yL1)z81VTNJU8Mt2X7!p!s_qrfajZ6yAJ z7Ipw$aS5K$3Ld+jAn>qtOwL43{|oOY?LM6F$F{Pzx%#CHXbp3_2NO)@?wz7V%Ybv^ z1}ff3x2Sisuen|h8?YxHBvArH17hzYT2CT=feA}k9$^Er$ZkcgNM*&5Vif+qs|O6X zsfJU2rsTBlN}>8}1$6Nu2tG<2hF%g=CBsvpBfFG*pb*`OF;t#mg53G)X}vV~G~ZVi z4{Wi&hgD{dqoE^(yL~unIFN6#BAk%Su)A2FWtQUyq^1uPEN$dU3+Gucx5Qo$%6fDG9n#Psr=b1lLL81o7w z5(>i8Eg_iaUW++sjz6hwsb}buj5ie&X%+BxZ!wKR0Zf_s^b=M>AzX$yp?U)wjo#=q z+)g9Ur%kJyc~IhX((|PyLrUk5D|qvGG~UF4mBWDI8x*ZK5(lPGzK-Mdmr%nvBq+;# z-1glsVJ)k@{N(HOPww;ZE^%yrijCh58fkKMvgaDX4FDp}iBkp+QUkODk&z8R$ib+e zxkS!5oHJtdB^{CW2uZvTJb(Sy&$H>aN;R+>X#MWA^1ATl#VFNlQ?ep`2fJmzYD-04 zM*4o7kqsU{MqvtXcc!) zP&$NU#3PA0)N&+{mq5VS`HoLc=Uman>FGNuQ#G^0h5^z7u+Q$cnrH86gp;9#(0ch_ z8U^sF^UD#{+0Z*5v8!tF+DEvda630Y_YKe5hS_(fi-&xn7C9qiQj8>PP_&K+~cuyMG1Lzjv9m*Zw9l%;2MgZWN>3RrNRYy_!) z0@LuC``FbqUA)LbdceNqhkxfm3M6(Ng^0}~)b`=YEP%!tVu8})(JjQ$O;jf|907bU z=Bc}&SCvIp#2nf&R$J%KKH2+u2~B&~&(cVjD!J*_J!_{3p_C|YA_sPHSp~Xs8set^^7`j26{lxYH|LAy#$B8~J{r?_ zaiGB8erA$5Q|HtMMl1oj1PAQuiWLy=Lmw+YUQYkO_wJI$q*-7}#?$pD9|RJw1!srT zU1h-Xee}+z`-E63g4QBKYert3A}+w_$}C+}xiQMo*Gfbk9*%eRQaog}_aC4ZEn^gR zxUS9z(nKi!0Z4;s_M80xn4ZXEk+IcES=~4-zD|1jwS8>94;Oq5D ztIJ3(w`7~P-f|ixz5DKDQwW;!0`}RCLyJX_lrL$xJ0utoRsSu6Yp!hJiKQnV|8vJm>v>YRZX?WzDa?5)qwN4_t@A|r0Uf(c2k-gwu+!y#1 z39viC0MJ~#jX3Td0d$C~C^nQm_BSBn8fMC$K_92L*!s|6aE+^a%Z9?_8jCS~D$1V147U27)l@7RVe zA+czH@=);`gyWf!lfb&}5B&zk#(xEv7~H3SFZLoU0N)EJ0Hd=nrIo5!IG6K-Oc2hg z`agX~P7y^#Pq|VAG7#hJKeP*Y-D8CEWX`^0dcJ3br&=vJLNw?46Ph5BgD(}BY+W3hAS3BX@BS5IL zTrh0l!zOsg(%oh^d_#|&c?~ldbyudwh;+AJ({AnbE;swB2U>Mj<0g!t@WP`{X?Pk!VY9WeIcB4mx&Poy z&b;q_H2)J{ghNsN^ISpao1dCOAT3#WGrtNWRrxLT(c&X3lP5|1RsIgR7rHjfYqLzB zJ|x3IqPJHSNM-SR-?z<~>yvGN3?5orYaa}WMtU_hhz@rj=m_$i%ATQK&AJw^N-3nh zw&15r_e1yzL(DKuH{)DsuTtu4$!#3m8^fI;zr#a_9bAkyUMs%ED<8sY0?CoJqy_tn zHg!|>?x87$E$AtXZ?$K=Mb@RzosSQguesYufPi4nu;vIIA=%}uhb zdiNP|!NaG421N99whqS+U|Z#~wQfYpXvD10^y+MZ9heS&wyk;{!bSUugii{YN$yhRxmHiA+p&Kv2_mY5)B<_aw+6On<-=|6F_meFb$j+ zk0XgONvQ6@)aC7=uaI*iDfCOap&pBX0R7(}?Zk5pC=YQ?bO4?9<4wxrQmZRkPnggy zJ0;kSlZE#k=&&D=!Is|;t5xPp-!B8*SG*v0j3Bo1)N0u}5PG<$)q>~1Xk${lz_k7$ zdxwIb;&1s?IsonALQyT>r()O{UYvnH2VR4Kgas@sj0ZR%wbHhyMIJXktSFQ6^I(YO zI7_}ArMC96$6xE$^;PgLl-y;3{%tQ?36{9_*jToBo=)8x zBhC|rCxZUx&@@PReEj8MGp{ax=VMu~!XXKh277D)g7akNGFiyW-T>6r)ssu;F@Uo9 zYc_kKDKa=PsN4V(_EGB~EbCVLYe*cN6mNxvEl^;;tRN$%M+aQ4Wum@so#@^YT^TFn znfjKT+)A!Bu=X;In2*1>jKbbgrqA}*724385EErxv5DIEVCenIT=&xm!A77121UvY z6&V#A6`i%}AWr(v;1>tayx2ZRmp^KN19e5a^3IBR)b#ucw2O}FqNdv&v$mFJKMu32 zD>E70ju6U+#F6Qi!PX0y)EH(nLPIk+M>vIzKYc#y1!qB3!)H+ncRnh3%IPlL6S-u@ zOc!QjuM0Gs<|opj;R%Jyuz^*nhPREk>v;ea%&7x{Puutc*m2Oph44>LFqhu|$m^xj zEvmE9rCd^sJ}|f&!%8+%x6Edk^rU*7=X`TQ?VSw`%I`zUxjAk)Q3U?0f?4EitE5w7L^V3K zcTP3gga60zX{4-+i;I86mOz?|j?qoSA0MJXfoVsULZoNPSP~o$5P`X{>*#sg891~5 zyFR@M<%ih|wl@0D%!I^t_p_58a%_a(H5v{gflhP&e~bqI-RSY3y&s4!B1CnwKhrva zzKG(V`yzi5$^m^5t3Uc8GQcz|WX|f2nNg{jpH-jp6Mf?9AI2#XQEh7uQ2c$smf{~9 zTL3Q`#>4wJNvKVogojKWvZ<1cMc<2!JG5&3yfJ>kE3LR`DDf#67^G4hwOy1)cIDD$ zKz^PKn(lToQRcnSE=GI}v95P-p3n@F0Y?Ci8|=Hc7jBq|1@_JuIKWZ@(2w6-Ke`~> z&q-OKtDX+7hXY$sL-9|)b6!ZK^7#0EBm{4O0$kHBt0aSu2MuQNmr$FItC<`)QrhIa z)pKIx;r*%}Xr$*sU1eE5AYLFAQZDbmZhJxqIY!X+it{u*2qMyd{uzJ&8nd1(`B%xi z&lX{PURjxxOlYRU4?%vH^Y{W%8ThpIAAG$XyPENI+8fS2byYi6UALG#Q>@dbrEdR6 z4W^W*U+Y1KMM&`FU@(r$U$z;6Q}cf^u;N!U{?pBJ7w|rr5`EkMnp>GHN$u0x>Er3v zq!g-Ks?5xS^k7>Ps=eq=r-tZCaxFQoAx@ zLH(*yuLJdm54!jsTccr_cslj*wsf~A!Hr*mt{Bu9;3xE=wlC4pm>+y88;NrPNWQ2c z0P;>`Yg0vM)oh#hnxs!1)b9jl!Ku-XZ8v}| zC`wR3MMFFuHYbFgjO>)nCE#D&!@0N80hw#W$Qq2c#>3{tBw{sbW*NpR;_%*qu!gvU z*;R#e^>j6}9!UIB!cY+D0r=Yx>$|c#REv?#t4k*f8cvlY73Jk-2q6*BQ`@9-CX}Go z7dzl`$PQcSF=ZRFq*BN8@Rrp8u{zCoLGrs~;ME61d$gAk`e{B#idjgvqWL#Z8p8!{ zU*dI*VR@mT%mx4wOlai~;mS{aGh7LFfUi~V3XHi&Hf6G12gugVZIgftMlQg$VUAdS zLMZ_EaG+&>c_Om0Y3vW+>-)yRHHVIlJ6;|v^I`B2H(Ha8%iMe+8t|5r3L{)cAZ_ld zm`I=B8%iIyYrQ9(gIZLbLbVj31@`^?)`U#+=e+Bp%@Gyys~fSrXmCdY3$_@KpWTwD zWVezVFtQ3=)7+kA5k4UAzs0hehK+ruT48@|ExaIQU|a8t7j4zrvVe&xw<^Ty&eqFs zxwQ3etdDf;vnFuD=gqSCV~2SreabDhjg}&k6g0|5?<$CrPk}&`AaB&1A`#YRN8KPm z$j3ANyyfeJiUyd&uHr9rR30GIYmXO%#^XfcT(NR-Liq&2E79a^aJ;X z5DSl<|3vInA1Q}A51QJS>Oh_ARKYp+fhL3!vMuA6z<7m?HUN)26AmmSyr>4cRQ@}? zrL%Ad5Q>ebAUi2YVsf9YEVKgPbX^uX(OR}vN9+op5}zRe+m?Uq!M=ARGGBS(0qFLW zO^>B>#Y%|K=Mp12=TT3SJj*-**R8{Vx+SjS17ugeZH1p+#vRY~`X9*&eYM^u?|IMW zNBJS>MG_#U$==%mOQiFb zEZw)RrSiTW75P=#RPx+B|A;=z;6N`Y|8c8DTjfq)nZ6eSb4>@n?N7i`Cw(n29dsq8 z9r*& z{ZT9pD!ga%SZ8&h(0l zzb&+`;O5Sf?54%yb1;YyWgql54kyv4cr7>Q`|d(%u$x%h2bL#cccq2*H0`< zS66Sy{EYulm9*=7yuQLot)0iS^qHnHxEns9c<1c?^hR$(F=A%Dj~MGrEZaGgnsKK% z*~@?V8zhkmK9?%pXcq9z5*!EPJa@58x>;Gw(+HfCr@I0RY6dXI6S$j>mt9?Sy8~qQ zV@5Npe1079{-|%jEBtF&4iUCHAs#@2 zR!ZO#Lfv=`wDL85rlG#$0`$cnY=#fLOf6Z455?9R;HRi%uO5Jixl@~Zl8u)3lW_*0 zdo-dp2iww+-!zx%cONdXL~o2tB97*LUCzx=h#q;^*Hl6f^xPyN6FnGNHMHG_+dt46 zY$LIPu&650>0jLfFl)Dn))Le^)eCbrJs6kO%h)d2FA{U^ zY_{D?!4&thuGOtcZboEKMvI!Xb068zZR#)Ax53lqD%pwIH{@Rb|Qku+vQZ zY)XdA2K>ToWzeZBs?`{Vjqa;9^Hz5Xw`PzI)&8bkgAE=ZhGZ`IhK|zKEJh<4WD}?W z;l3&}`naXGXQAOaZ|bh_?t1UnsIm*vr=ogZr;em^|K*s3x{0k`Rncv?y^QP*G?CQz zWE|~fSdxm3W~aFRP%rpa1*+4swzqBBjn}%#M5mUTJ?O(V$=f1d*czyl^hlJSrhzv9 z^jWAI*q+rE#2&xcJr8N zi!*cbHJ*^`C=9F$gvkBJ7yNl2jLpW%bj%e-8yc=xg{CnkD{&cjiadH5)=m0SjWdp^ z6EBCMtzH=+-y-tNJPup86tZvNsh_{rwR*XZ{j6Yd4@k z;Jo_31Ly8g+y4%n!Or8R0F&|0c-xUE3aEzEGKJw&+Hc`OFaUE;Dl9?#T|s30R|V0Y zK5_le1(5_H-}X0Xcm%MA`o~62l^b-2g~ma_LKmckDM|%$9;-2aH@$VOhQrj{@{xW2 zrCSs%74f~B)}53PMReCWQ8toiQu6*uHKlP@cVJ9V@~4?G!ye%WEJ-pcviv%lw$(YC zt-KTYV|ew)F2&#U>%Y`Chl_J3yO7(A^|M;hkg#&lr?t^Y&ff;0(N7u-ukNZbnVLKT zQ2@Q%3;7?BGWB9d%))K5wNyk?I6q7po3ix4!JrY78h-4?_G8aJ(V@8}NkL9m#8uT@ zxh=i&M>2HE)U2thdXn443d_?F{;BcS8|_F=>9R|x(q-ElS-M6qOwJ~EgWYOR78?#Z z^t&X#-sqG@ip1^apVps&`vbpxek#w^+vWgSO4);U0uH7LhJ@S#pg8&9vNaOjR@wIp zgSjDq#DV<1j8eZo4*n=V3;vS%@@B#na#kx9`5aQg=^BVVY8Fr!U%^)E)z8E?*rOKa zzgq^pTVc(P|G_CY*b&p;=li1MoDwEw)s7wN#aHx^{sXnILW;(79SA5_=Xz{G+*G=4W+5*`k1r(1`*ip8S$NwbSK{F_*4N! zFXbOAimr7k$V>B>vN&VoxEh%s70<)O=IMlbV;+Ooz|t=vqy0RBS1$Q|-M-Rbu;F|} zsuCV#%7lJzZR4s;$fr~oYE*5bkI1*>NU`&VB>*7Si5ONEz0sGZ)m=`--aS)eC!@9_ z+~?Zs|1`AzcJau7h~3Yx&s}z6gvzBFA_Q4wjjzfJq*UOphIbfrX>`6aG5*DrSYqP_ zt-gMiz8^$KY_V=`w6dt@vOwof&PY;guYX@lPI5`)&8gC!M)lT`xtP;C)lpvME)^D* zWs;6bOd`?WJuZkrAOK@WgDJ!sa{$o62iQHE0;s9W=0tTrig-|GbkS?zWT}#jwmysF5T;(T{VBMf)*NB00tF)EaC{~_cuIhqAvVaFZJ#W zo*psXz83p3w7m#n|3LMJHLo8os&ffpFe*5-7%9|^+>R({5(dIo1s_5N{ z%B-vCCYNGQU#{g>v03-u<1%iHFadnBE+;N%1e-|z3q-Yv!?zoxM@%b=Rc-i}_y<6^ z2WSwZj--C8BhfHBx8L=EIMeD4$rle}C@3W4nw&5_SBdAv?YIx(GmXVJ&3~x(|FpP8uj@1r_%O zduK%w9bfLk82Dr^J=)`b+)vWAnwZyI-x-E4gl6efJUd-E?P(x&Zqy19hg3y=%?d!M ztnPd+Nk~mlb9sk>r?{2FC+1Y*R!Y07SU*7D^e#}OXe#0RMSi7$4j~c9t2KydMc4Xk zo-O&iA1M8tZk(;!WzcWuy&S2d6$VE`8NEj$E3M`M^nd$w!!7d_1b$m@R0s4X53RtL z1(IFOVhe|<;O=jq`$RCz8&^B6#YlzIm|DG2gta5FOx_5#X6lulMfyzyy)yHKMW|yA zQ#q}?WZay_BbJaZQFT^|0-0~Xybxwp3CFrV3k-88tM}B|x@7-?t5ke`#KdUeO3Pv= zm*kkTTlSq5F=$^d<;BLKwfAz*CWU|fPzOHhRO(S2#%?4VB2kD=*(FXdbF*1Kvm{Zi!u_Z0SKy)=?v6x8^kU>*1gnuujdLcxc=eX(Y#r|n$X+RW|~dwd@NHIQ%Xn~B}> zW|`&rAhYX_#(zz*Mn0$%6g>I@+ri0O4V8?njBwh-4wY;R@Z0?`mX_k`9!Tg6Q3+as zbbp~+?oH>M{>b+8VDoKl^=mHIGDW04D=P#L7#w1PR;7CY#Q`W82Vsx-sW0zfz3Lu_ zsD8+XcUghVMLwADxx%&hpg!)S7OQp7RXs^M3UP$K^Z9v*^5ODKcP90U>f6&f+f8w| ze%(?X&!3%r1)fv-PZX9;nL7Mmuf}p+!^Zh6U$z}rINGr?e(v5K`^@hE))QwpA2$5W zlyxm_2?oA`-q@9@aAE_PZN!C#d~phDHb#&8-0~84lTD6Qgl8-WCD-elP5oEmMMBD? zsny+D{Pzz@)HqcDyA1Ga&*G_F7|HXrd+U7}&=D!0?;R$)tKViS@wc!EA7R>K@%j0C z5i8{MKP>yTK2T?U=rdJGfCs&_4To~Wl%o8DPGT`~(T~Hco5BL_8^-cvj`cVD&NoD^ zs&NfrL{@q~ERokw02$VlPtzjyr1E7dcLq7^f|_7xX!2JIrk@OWCv0K1&=kyS$?4Jf z3!19CP}seoj9A2?1xi6xqH#i11)I<g~{NHA+Rs2{8txE_z*YC>Wo=`UaXzBk_I~mAX zVznbb9|KpYr_LEw<`Gp_xqzjE)_*qW_r^tZY`neQb--=@9zHxiAlNzY?Ul&+V-)(x z`V&DXn>I+3kaNuWWYM&?27T5vUA|{VeOYx}E3puYgjM)nj!M_mSFU6dPsPl4H;T8o zXyTD0h(|8DpMp*@}X5-{t7|;4eR(%7E@e_%)McX$xvjz z3w&x!bAfWAP?#pscA5P%O(Z1iBFNFXjTPoqGISF_1(mORmO2?qnYO)^)Qu}7>&|}B z8hPuaVrW^vr^zEK+NTg|$thzwad`4tlMkQ0E7|};f0yLtp2@{N8(-+^YP8QRxwITH z&ZNN!x+L-wL819>v1TKNC&bmP{u#HQ1c&%yI>#tQj{nRm2b>af92s8AZV{2`C>pD@{JeqVKx*F%K-bEszTl8%C3V+-7ej zy^F{M0b8O^y~k%=BsYimh-4Cr9p_}^(Mm}-RT@ST&`B5;~s z=#NXtGi)vJ-qHs2(?(Y?Rn4v1chRoKcWPW-$|-+h3H5UyVd=939o&N7!uEB~$RIjD zq2HcYAgSNtyysI`BdtM_GJCTp6_M?WY$#a|22@zH*T;cn!js48&)3dgWH@*XlUxi@ z3Va#sAe+qGH$puhG0{x2oT+af!F;{7%h^aYYl){}seTcVGZSJByY3TS8_b^1Pi-_m{*n{pQCl*4tm;a5@lcBke)L18}_4=7h}t+Ym> z-nKc%sR|uEpzAhBmB-F`X9hQemx2ZK>30k<(zo?f=sDGeDfxqrKxjRec^{X;s_}h^C#*q10rrM7 zZ;iALP^oS9=`0nZ%33QAG5ot6+3n*JXEtijeyLFpTHt0JsIKsJy-Q*KP@t@`tnw#)hfw$Ly(6I+-<^cM?d%QncpWjJ9v*?(0ClcAsZ?V$CUW22Q@G z5W1_Tk3O#IwS<2}tacX;J)&EoJU5`I!A^8DV68eEQ=_uO-MKnRrMrhst;5JEL5VZt z%{*PJebF%8W!w!I=kmKL*$=Pcf@nl{J)Y*w+R+J&rTVakVM2m?eAtfh=Y_v;VuS&K zhu#)L@3yr8B*gkzI!^wf)jUk3@^kow0UwpgqTUcoV!CZgFQTHysUIRcp~D)5>r1JNouXDaX561 zaBV^9m6f=f3P{<>rL5KZ#ApUdgo8CNai5YmN1kL%uBkGQH;cIHq+3qf8NDpkqWjW* z@**V>Ckkuyi~h(Vye$&?4fdIsA{dKUsUUpBD`h8SEvnTs8^z~5{rQA$Qq&HUO_yII zCauD(Q8~`jJFn!J&9c8&R_plh*uDOH7O;Q(><|?VJ)i_m6co@r%T@^#3mEA} zosj{|#+lJSJgb{t?4Q2pxP`=(5$L$}cg&JX$^pR|(3mAq>-^D}<=2zCulq+|_Kzm) zUttcvkbkBDA^(L2#EzPI;Q+fVaTm_ieYttC2?2l*6aQEc;zDxU^t(5sqz^PZ^HpoQ z)R^9z`dNPN%(l;_IlETWUt6+c$|?LNjk~5f=$2BA!@k5Z)1-!|7k83O!-E$_f+|Aw zjcEg?T7$^umN*)nZ@(N%-py3nh#M?76>kgw(D`@yR;CpdA8Qj}ky%6E)5`sd@3>MG z?(-c1c03JzNmkJ`)f~y^K~%n1pC*0bpDcJtH|l#KKh7yJ-{x)sUt@wssbejh*h7tC zp}?3A8d4g7>ZaW&!BT%mFZ)9EvB%lP1;k8q+c{-9m5XrW+vltnk6!k3pWGxl*#A}H zwd{r zQ5YL7I-RiPS3h+29sM!iaPec7Y?7St3jpU(;N^|4A?d)2>dP;`d&%Y5+F=f{jyVIY zrn)cFEt?gtKxnZWUnkby9R{`~9b6trvA<3#`AfZnymhs%uC6*%%ymWkdX$^?szFrl zs8BoU*2A=f2)t71pdiqt5Sdj2sq{3zs3grEx}lIwYq#PS-WR3HzEcF##CD5_#a$EIjj|8=)KSB>pn=e{7*EYu&b17DQNc_M8HkfY`@AWz6e8AN7b`gjq z1i*VN+R$V^&T@=X!JbR$`-&8}%eA^UdSKj#`Sp!OUzMHs zDKYVpU9J2dm-r>AHOUR)D#2CvR8OR47Fr{k(GIVC*t?3Ln{~|0e0xszb#a*qr3`IS zfW73`HnO8#^Ilcu8MyfNG|K7XsH80lj6Z(z8If&TsEn0+EIULQl(g`&5R-FqY6 zEag@6An1~Bn&7RA`bsYp9?>nZlBS(}Ta6c*7JAFWO;wT$A{hlKQrY*^wya6G`7!9z zyrkoZ;baNC;(wFmuq~D-kLRAm%lJ7Tir36YR`uDRI8iGlhGuSGcyySg&e;G^DV{CT z?PPJG8IyJl2dEalv%mJY=3G(9cAt-pBdG1zwzw@J$RGX)uA8|t@iF7d-DnpUVJ*)C zH2i%=BMScp%}I34 zT{_#vL)^vlId2aQcY{wuV&Sq!>jkL=7P>c2V)#Qw+CTXULAidqb`DvqQRs&ar%BLQ}E+C8yO*OPAc$*I%V zamaq=*;MJ%&8<;X?1Vk-IKM!#Z0m>9>OoXUL@0CGF;)o_7QMU` z#~SuAMXftTt@j`Rl`keXK0eKE5kg7h!WKF~B`HL@j*LB*t6rw#assp5)HQ3auc5q_ z!{8u%mDjWPc}1+b3JK|SlK>%)$b4C5x76?X47=BfM76fSB4K#<;Bdc*>v;dKjGe5D z(1KwaBlF}h3!A-FuT1_Av8+_a@OfG!?rp`2ERotaZV}{E&1qdVs&y~_}8K7+8Sdw)J_@#%BD-eyQUR(0PLMO3=jJ)@gq(rSX0*( zzV2a0?bgt$j~XMF`&XGZ6_LAw=<%5cE}7EX!MA_qNLgxm2;YJ|XU#NK8!=AqyFb%1 zLdI;+(lYFH)97+Vl92tQpez174MX~5{&JmbwzH0bu&ZILPjwxq-o}S(L*Ejl)L0NS zc=eTiMt^@7xY$I_1lRhIWyh80y)-mR`%l#EGH7TegJz;&0uT%wA8_;;=n+Nj>?nSz z8>@M(G*HW~#O^4*C$pi?A(S+boZQk*Gpl@{f0HC`we5_-XCFuY1F>0v;vu|mz(Sb` zaBBePGdFQg+}x<9Ds zO@W?{QVb!h&W2m1fIz$n$(YK)nPmXhv&1%_r#^KAN9H=Q>8E*@vb#tqN2LBj=UMfC zrFJ9bdwP6a=&Y!hQg&PHI7lRD^X(p`7sFuodB{@iDCte;8L0&j$7>+KgoyVrVJ*BY zMRX)VF@ZpTG1xXLvFI*|sTy!*f;K^A*dsR{&f5ED2lAKLVo(sg0 zHA!*^i3MPmh!pgDfbRwnY_cCf@3|}mR?GBLU9+Sdwsnj_cfFpY zV#zm)f+s{->m+t{8DL5uV}K4yj%hIB?o{&~oT9~68qdvokB?j9qE4rI7US~Xqz(3D zb}d0uCz*xYzP%$xwGn&Y-`qFutj&S0~au>vUh~)-K+#Eunr+k&UW$$8#==;M(dJ9qX=sU;Fe<*Ku}0Wk4he zUYqOn(Qzhu{)f#s%MsZeV-LPJxy}4~n!Vg99jq+9mqGsWyJ~7!r{syrSIDDsMJkft|IKb-hwZ#-)JDue~hsV5*!rR5GU?M1Cj0 zkG`xpadh{QriSsP@~vfp<+O;H{)3vvu#59FFFNBtRe(A#sM>-!o`k4EFm2J0WdycK z3my3~=flx!*84N&6|pZ}9w{vy#TC`IwH_^%sm zc8!)^xCF#Y&L#b8bNIXHST=NZki}kC4<7^PH{SI}?4&QU`YyCSMrQ#kj{7c}xrlhzR7WUq1L?@hzoa7u&TP-pORdr$ z7)`VHQx3%YPv7m|k05&%qUwW5*rm()QV5*N_?u2X!M^A+vktMmS$260Jx|5zW;R#g zl6g=WR;%0gCe~OmvRPkRwZYGs&yT7o`Z?{fwK%Bnhv2qL<9IXwaxkr(XYQ7quNOyx zGz&eLqUbxh=Wg1jBk39>9LZ9Hys%liBoQ^Bni!NPbB7~=x=}e`FpTkE5G=;x?c-wnvTnjy||hSx-I@fm!uzLK7C#v(X4`cv}0+Xk1-5) zi89hO+++>qMVGSKzRulzlOD|0yU;6l%f3GP35}Ll4Gpp!6wudgBYpYm(q3tvRYs3} zei@ZMEB&rDvO?w|v>Js^dH^QiPy{f4kj9tt+TEr(+dCZSdveV$io73hnyk?u7yWV- z^&YaXo^Z>lslUG)LuRpRosl6NLasBg5~BJL+lg2?`NK%N0)yB*MwKq^$&XG4PbcI- zZ8{J^c5a?tu3j|boe6w*IqRq<@CUWD2MESynWZ ze0@8bCOudsx5h_58Z%ip&6hy=LOx-iwv7;2Ha6uOUzBM$-Qnl)9OAu%)QdbQW$I+) z#+W^slz5D*Vo+gEdqDU8S5bwN>_Fsse@g4C1WYNfO(b^3^N9P8@uIYw-#>mr)Qh!k zWFQN%lWrA#czUVxH;~@f7spyOyy(im*VFeF9}hsri-!FCAPoK8TwS4ORH7VxhMq;1 z^XQR=w#MeP&W6d2hQU5u%@L>mtfH7`Zx;IhmOT-=Oy| zrK*WxGAlIAGhN9y)V_Eji^5r5cOnL4ICHb+-wq+U{4KqHm%YXFi{f8Rbt@?0q2USV z>SEj$BFU$ZdXk+unmC^M7rzu^Ux8hm=DxL;-hZPe|Bq^Q;OF!y7H*F&sl2Wa?LYg_kc+~8Yn_A9f9QUhLs$Z>X@3JPvxO%t-y4nRhhzp zp^7Gi;%F895sw5caJT-JbMTja{I4wRz5C^%`I`xypFj?S}lH zo0J3mZ2&qX@wyE zMB9w}6Kzx8b^vHJ6YpgKb*8l|U;&n;h-oDF3HY^-9|25GwTa=xW5Y6pd3J|QV-!2?{0d;r;x;9##?mqlEMB&ADi{J2zL0h z1Qfy#_>d54VdxhuqC>etdvUyJ|6;Kfb?hk;2;!Cve24K#5W#Ef@>%E`%- z)25!*nDtL%k?pt}hlkb;z(~^T3;*FX(o998F}2I*#-93p`GT8{7o=~5J3qN`DVwUt zDcpH_?y)7P$mqRM1yjLpHdT<)xNDzfeO=u9Y^rn(!PM`jUux2YE>!9}```Oi_f`ya z`&u5Q(Z#~>>`R@Kp-%Ho-+(=nshiN5kPqTi98WM-LgY7Yd#3B~lwZ4#d+C^|j;p?^sKoL_vU~JoGE-@8^;*^ zmSndN{#qs?UyKK6fF5mUr@%?n&aixp2c-D;s|OUT>nUqUMj!cjO4j>wu#zOm>H_2* zX7Lve;1{2!60yKY>;<(x&IjW9#i)u7Y|^GdIJk5%#R}lM?&?PjD~rU7Tvfi^J^x|h zJCg7TP$eGzMU_}|p~<^s#LOz+ED%&EcwGKNiKucF*ma|Ek97zxetN$_33u^`BVIM; zqW^+{*nRr(GLXWkP4Xl9z6bCFp)2V8s6a)Hq!xEkj_{wD6Fl z>*gi#xqypC0v54(J47;A`D5IgzS;3Bkg7BO<^H)ubU z$QT?kMD*AC4T903OHKay(mz+@pL^qXz}O>1*Kg2mMv`#wacQ*4DiH33E(`)e&U7Zh>J3o)}h9fCJ(*B=4 zg8yF&UH{eV{XY$H6aQS_|B_na|8#vP|GB>Z(Dj}Ahp+FaW{pyT&}D+YO`@N8x`U^0 z*Q7f8EiiT$1argb!{>n$s^nDWzJ=@am^=J;# zvl&l7KYWo;c+6@)<-JiE)_YArduigD1usRm6Nikr%Dn6@^|T+6{F;U`i#ugBD*;^Ka0Q zS`P{Dbxm!;ZB(Z|RtVY-*u{xs>Rzu(kk1!nI;<@F&RmmvbcboZIJO@n1me-IDxOmS zI);}>kB}z<*vw#z)(K+Fqc$LI)o64r`EJvPd9VKI(EEDb2Nvy^Jwd)o!4?qYo^If-;9iI{R-so0`oS^*&**RxJ+=3`s z^hiPpfs}oJ&z*B;pR@OwIWud`z3a~G#mdT`(3J@50B18RGINI#6_lspy*^bF77`P6e_J2%ekRmJjv{2mRs z9BQ@)pTPiJM%-|EyrF*msO*el=MxPo=hD#GqerUxJjwpRP?=$nWLjIKTB%ze;^DAc za`z*aOiO+Ga>kXT?~$*=MRqHTDc|y@o~#r)%U^5Fb}QLT$?wlx)l)gA?JJn1ODQh; z<1}9k5ejrOy!7L43ueQoFU`4hnByiaqMu!sEEsn~zr3^_raK`zM+44h+%hT zqD)}!E9)a%^;6tdK057S>3PYQ+xF^lkbjR=AW(Fw{DVXWMcL{xZsBkkb#SWG%p7_6 z*W){XLr(oyqH0(=(Hh^fhziSM`hZQ3b0@wM_+mLzK!48z<;`=99!;{pla4X&M1RjFctF3pM6y zEGo={dhJcXX{xwtO|cb(Ng+lzVkvt04Ar@L*k@>U_D=tMa{tiwNzi(XDSY%Wtsz74 z?09+SIn57O>HGMU$3qk;3xgNnMl@d+>X6;L%tQ*jm5TRLDYwM+oh|u^V4wx#^>^1d zK88Q$xCMATV~7(#0w#xeA8>IjJD{mRc^6VKO@e6+AI`w5TzurGBU=~AW@CD?fzg_E zZJZb4JhL(7YCJ$BTAqtsq=FFLL!iK;r41+tbi*7o_#Gg6>e(I+W*{Nc(44((mp#2b!2C^drjLH@TW9_U%$8;LKK>@c*6T|B0nnX7 zY-uNix0S6hK}Fxrp>SS~G+vnUK#+MSw6gFh7M-$?fqz4$1&G&pkxbC-R-m?+qiHh? ze2x9F0}?mRE*&=hp1-!bdR$KFhvcRENnNaNA2VP56z$9a5$t+`6)}??NLpt#S*-W+ z-OYf9+V}F^3#;ETmi_oV1gZiB>yOfg04W0at-fw_AinCtCFby%U2q8xt&HyyV49l*{YDhYt5W z4xN%8>w`~v373QW8>M#k7qhEd`v%WB_wh7#VK}MB&Q@w|vfkrE`h8Eh4^qplXQ!CI z=wn>?t<^S`?K=LrK1`2N6)Z>?8~c6cn$7h%Vx1hI56qW3i;{ZemLbU-s7+`(*Zp0x z9F|427(2A)UW$&XbECb-H|8e$qPP6vvqv{x^FN5xX1pse{S)sjH4VX)0+PXMo81eh zDFj$&I!jvZaQR#R)@)t(LA*@bo{L4*)yolL8Mg>@rY5K@bT1$un&{HXQB0ZHD8 zpWVjwiH(&Goy?C{?+;;u5A9T&%;7uRgfsZkWvvs$ZPL3-Eny?r3AESLce|pk%d_c? ziJ6$(0Ee%(bEnSWo0j@h-@hRk;$a3=v082}D=jh{xm1Z&1^HY>y}gT!2VjyS(31;J zeeaT?6Xg{+$n0|TRp3qh?gg#!AJxq=5uKX{aMb19E#o9F; z?p$A}eWz~?goIy-t7Hte*h78u#F@3?Zx?Q5$>8&Iy4+UIxMxgv>I=f)kE&RHkz#p1 z$M)UF-tH)@K#~3_80(V)dA4Bt3uIA*#lmx&Io4vPLDee7`{EU*nP&913H z3oH&!3wM}0XV9u;mpXNwGaV_sKQdFZ?ovwcpQOe(`Dv9|o&O!F1OQGzdlsmgjj}G) zHOM$wxJ~5Rd2K(Jc<;6%Sr&gqd#_1%Lt~X!d=ElCGgY!x53dc1%zvTSFy{3JUtw(c z<-v`!#kQl*+0<2QcGV$;C_p6@vOG97*fD=$oG4o0H2t!%>!7~+eU6Y=OcEy;E;_U> z(CSw{IK{oP@Eql62-@PkjMdJzE3}&0l6sW%?F^MYn^`qt(laaQ30w-m4Cv1BB39z0%XgV%BkN{89eKN{{m^yVBaRpV5XRx=h2v2j(-Mc8Wvc5O|`jw<-$lbTzaz6h~9Q#|7{GW_z zC;p3lRvhMSDi4S2?TlPMtY0-{EEJO>sP4-qdz2s zomNsfA5ycVV*IEL7+eurkZK(?A5QHYR;?w+Sn)FaMBjt<+rYpEB9Bc$c!4Q zrC)#s8vLc`Pj4*uJp+qfjwV)PEqc9q9Hq&##(8gl1+}O{vH{-7?iOEM@)*3E3>jIk z$FN68n(Ebld_y~8y+D%^82;JcgnoxptfZ;ZqIDLSp>Ta=gd7%enJGDA>{fKuqY!rz z#bgHH_-gcokU0?Y7>7?4;JX%^K4`y_vHG+vN-+&?4W4S=vXRP1)kcu z517H7J@4%8nn#T7ug2^-#ewQVS0fYoH&4xrrvVa%!d>QkWdR$7^A%~t{ za!Fi5Dz0}x^alejCp?%wKmTTEA>_;(yWF4*=?AV~-XQrz&U{C&I9`y3zXXVhO%tN$ znMS=TXb~EtM_`p77($ZbO~x24!zu1&U~?bOHe z(p;?R%=UING!v5y-b~9LnAyJd0z}F-zW)JEgv|jUxN!s2QUGdGb>}*>zkT?3Htb)_ zA5s>2{t&IUfGfq)JR@rIRNwuEZ*iAgiKP^1bGmlkj3&0aC-kslGXRxQF>|%uH;b9{ z4&mk`CZkhi#c*@{SvUTDF2e`pjQY>cmx6o@3~fhnnVa>SpT{;UEEWr;%DxDEkzQ9z zP;?c!`+&4c^iFM~TKC|A4Wd}o=z;+}f3-ob!}8TY&w^=v`Mw~ycB7~32x(W+jHCNU zA3xe7fE=dA-5MzjB&QfqQ7XB4_34hT#904ez2`pY9#`0l;Hl{A@Tu$`2b(=2YG>m7 z{3GbfLG=r_k7OwYchwEbBvLHO$ufJIvJK{wC3JTMYZ!gY*!h&u)P=7mQG11ER2v#79cWZ+p9oFW2@9?i5JGC z$}XL$a0#uQ6Vm87V-KIdrMxVt^m(Jpk#esBH;-@g#iCZMcxf=Y&c$8@GFkZ-MI!M*VDH+6>7@tZgREH!(F?Sa5 z`*@(W$NCUTqTnL#8pgFsL#QQMsWxIJ8r54H#|}C9E|1JdsojtJ1i36tE!mOyBqgNo z|sgB~UjJmFgq_efUM-Qlv;x7%x1aodIuy|W@cluzvmRl@XUud?z`tBoQr`qhlB$RkRd^|*q zNzr5W8u=;886J9VVP_QkKzWd%8MC3VpQ+O^`PpEDOP=1X?^n>EE;om8+I1#jAE+Jb z;ovlHA5ewuDBAVl8L8Bzr?dl7+1TW!f5o1Ja(}?6zHU8^XNcxmY6Ie+}kxDX62DuYZ`wN_wE9_>90kMtoxtFP9CygUGB0?17eA19McnXvOa^_TT zCsePj_0S&kP<#ZPECYBlihuT2v<1a({G`#35IZymXz_rMh>3LC=oIVE>aS_S$#`cD zzyMh$0R!s5jugftX^CF>IZXd$b@|_BoQao6<;aI99-;(x?x+hIb>3GYn5(JDO%HBJ zKgp%qCy%2q-9H_kJp5VF7zn_rOEwlT#z30`o$m0t8?B=Uss<+_olZz!hikL-igWy0 zDo<(%k;Yqqp6sL$Gz3BCjqp~^W^|I7KWm9*vZK^b8QT?Y65dvagk>|Rok4hzs?-D< zVw;_WRs-x;e>sFQTWfxAuhI!!oi1SGY$p~ylh65Hd5sKs6~5{t2+^EPGUt%n#Hhgt z`dw{Z=rtXBF*Y3(WwO6;ip8)Nk966UcRgI7k`j;^We(^usBnm88HSD~T=ZLLx4G7g zZ^EQ?uMu>|+}{{~j5c7vl}jEy_DN!6dS&PPHFWZ&>pBmNatVbOfuRMKZVSn(-n<}w zFYR=vsCUV?eq3_}s7X^+3>}}JF1V++4sG=wgyVJE1Il}*4{x&@G-m_y3@`5HBTjzU z3j-b)dJBpffz}(w-*3zs*th%;>o4v4Y{n_T>vmk{xo68XpqjJy2Ef9h@Q=TOY6{yf zC@exc#357`UmMc9GPX^c&8CH$(z(B^XU2f;4BEb>(5-N(3jq$uPeh-3BlMNIw{cW!*zNoIcI{)trvBBm z`_Q-e9UX0gm+xM}f_oWq`1g@;zI9NDVX%-i?hV7@ZOh|2|oOiGL1_8Md3q8+C)2Xo1A$IbIHc zc<$>p6|&)V2+l-ZEKLtI{>&1bZfeeY(}p*jz^;GeIc|2ok3VmOGDOWuUENT<<*~~V zIR=FFm^xaL%*L25X-6;WtCxGaoql7KsZD=WgV`w*L`8$O@Ac+Y!A0+7k6SW#>DRDC|DbGGh0sIVrb9`vJ{P#@n@P4T< zuYNtGa{|-NiF7bcLK#QZb6hAXpL5_zY~bWUofgR7utyoi^$o#?^GWee4yDI3cLJ7C7XbJ*gq7%oBIWSa>7#83H$TQK5wAB?-@dFtQ9mWPuSWq_ z02|ILc?6K=OEA+GmMYbj9Xy809;rzb=^o~MPlNhF2v33!Q45Re$PVq^{$#4>f-HAV zbh>J#9y{9QqH?wO;Vv|hEaK#$hZZOpgzN|io`CG{djQte5!m`-aPX;JTqhuugv92yXE9*CJDGB~ z8p;{ISxa2JB+VMv8L9z)ts_T}1Fp70wi&s(zi(z+jI4{&i`hD&dN%ZtatA#LnzRH` zal`c=>hW^r`Cm=0_gK9Q<$tNTH(Eu3(k{wjlEn97vOADmtq|v6FjnS)c;|xKQ&!ou zbVsICmEa9S0fDD!rfqUWqMr?68MTv+ZP!cdUUUwIG`d)Bd+E3Y5)|6R6R!;Sb27$_ z*;|GwlG8+T7!{U|;%lXz+c1p+dVxKUs>T-YnLRW~zA?wE#CqW6T71Ow(9=5gKfvSs zueD{ifPb~DB9yeyZP?l39suxTM4bwtd`c^BR@Q<=HuZI{nGDhVUjbD8*0TPi<5K7e z!Q*6O;e--!9e;EDPZiPr(VPDD`QQ4YW`DD?ivCNmmjB88$Ug_WvTXDv1{4Aw`Y&9d zmVb|V`?s&#cwjA;kUtie8(ZG@FggkVI_2#8$wGA8o7v>8+`tY{vhL=c9gRJ zh(X%k_w0uqd~scdihO6U9+FFor#2d`*Q$z8=}xvD5?^=2W*$W9fBdw%q@l>Rn9Xjw zSlG2dW90|8z&?s(?KqlXFD|w`Q}!bfk=#B1NSn!cD4xL|=)`|!b9J1|j<}9@z;ey& zOcDih<|=S`_gm_7?N!TA4w?3x=8~m}8p@MdIYTs6_@)(xxT8;*F3Xc0mbH!XbfN`S zS$=mfCUdIP&vCmwprlZS-NV>*S)k(8&ilD^HDcrH7)*K{K3pkG=X|Pr3=^hIu)vRg zg6}1uHOXx!>EtxkAVUHW$n?@+V5IPK0PMpVw16MQo}XEQcUZf0n>1d=RdVc%A2az% z^_^X5Xnc09LU=IEw1GVa_qwwXrcC=(bu3D-!r-SgMfk3$0UlR-xj#%)~(c zsz!CcA)py^yHo{{U;Y+;O+0k!j{jFCdv!IO2<0T|5%M|j73@(*((>-J$@(*@Bp~X# zAJ11=SsS}U-R7D6kdcNmgel;Jo8X}PD=2&0gFwt)fxk=&9^hqoflG&n>lJNvXj*f? zcP*1^j`KNNF&uPBqihE~#48Q$6(@9l;lF|?699H^b;zSOPdBe9eJ2xp8tUBp9cr)H zQ~_3PWCWn91zkEODlPTB=zA0RqM*s4C4C@Kxuh6edF6v~S3u7!{f_yl+SU!-kVc zR1yRidp*$P{TIQr+M4dv3g8|qF5)_tEddQ)=|xy$DE9ARFr$d8c{8(cI(@b<>gP9} z_E2dt%T!tgFrLm~s%$+UJRHo5ZHbGF+zH5hpL;&OVLEbdEmJQ4nWLSi^5q}zRP&#U zhkxF*ImeDpQ5{cD_(AUSk>HC*s|fi!<E+_IXx+CNhJEvfgT!%{2UvIlGVr4oW4c&CZ~Lv1BYnphytWC+1noN0n`n#65gc=%r;D7 zYk{|Um&&GLsi`+V$Cx@=f8<-_nVeL+IF78WWze_WBLb*wr60}BE65aXwOQPNG(01l z(gWpaifP>PVDKyLXq1W~Lj3&;Zl0v(*m2a&$X3g~Vd^E!uI@?0gZ-lR37Sjtr2+1X zD5qxVQ?$2pTh8nLQg^>E7~^=|9%s-oyaQT?>@wus6n&3}Ey>}E3qS92S!wv>bLKuM zJ9URA?21bc(AFA>-~API0|sP*KIQ!iqFqKgsJbl6hZDq|d>7~rl#$Ha@EL_(obqdP z);+Fe>)nre&+K(=6nri0_I^?=awF9Vf`_pG3c3UX6U>~mfI2IMAJhmR zo!9ZsWdxrAsk=M#Stwqj_~|)%UUgqSnqA&n@QJh*W&Yo)Gyh8g{6FSD+8ATGS3EjA zB96}cv?`AyYw}P!$>7!AOW!rap2mD@Q{5ilUaCmI)lVm&r7=jukCBpgNk48Se*R@V zWeqBmb$aC>he;Fo|G$zlQ5%jBJRJZC;C`>!gQ|CEXIY%H!b3VY0A7))Y=WxA!~=Sw z8_iQ{V!vq<7Ses%eLZFd7hILWnlz!LHg#ViM^OBJy|#{Gp1?9qY%QF{k@qrQ$zjPw z=|Dm&cg#FcY0BN?tgU+0c|?sbWvmg+t1Wna^*thFNUApTY4>{16V<8-*Y~cgbeA6{ z$#i!(3y?+_b-Vor#D921q1X`Tur4}pEHSFs*82LF8cNd5kCbjkf71jfgV9|GI3~So zo(bcy6VR$WRvUM1-)uPTN4M<9Fa;;6$POKfj{=@dukS^9R&o(Mmm2$o!@S}EjRC#d zVW~rIMJcJvJEMK^qWS7wPbS3}8J}tZl(Q<6D@cF@nH2$P9G2_E%w{x1WX{PoZE$-{ z*v34Yvg1h_OZHQhVZ1s82iW#q3jJs&m)!d z=`DzCoaEZuDsFu*qYt#I)F9|mAv$$Q8O7;0vfV=#37gcg+?DD`TOR)!=UlelwW;%I?h;ZW@|IjkbybNPLzcelQT@X;Fuj zYVTLp7|E-9cDuO4-%T*_{XvJAK$_rNMw);_V*tm%WQjpW9~y%3Yoj-Y5~2(5Rf zzvpyI_g^4SZmRQE5u_0}u{vGQ=muT^tV^5FPC)thcW?Zf^Y(tZ&;Gs7Grmynz&h9~ zxFdt9(_`nO+-OAyz|oy-T+i3ZRJO5uTz;Ew=--}T3+g=3k z{|G>{SjZh{PQh+vCy*mtXm8Gq{t8+dheAlFho}LzGfB*zd;#4t0}QWvWe%e$u8@=!%t%zj3zz#9w)0-Nt$e30qmBQA z<(t>EWs}C*E3PTm-sZA=-t7vywGobcoYWrDzZh(wq&WJax8hB(FhZI1UhVScB%Fz; zJtyP$Qd&~Hhfx0EUbF}Uz@KL;H1xR(d>)a^pJMxpa5{lG|3evhU{R^g+3 zVd9%iQ-x$*Xc%gTBm3l>5ls&hCR_tAM^GoSV7g;(HJIuY^%pHDob;8ZoC{l5I)=TI zvi)HYT#T0>Kszvlomqld)H82^Wyo2Vh7`6jAuAOhS3;`NrodZH=^n5SUCJ7g3$=<4 zrbAAu0$*LLCFr9%Bm>T>qOc;xMxJ8I!=LM<>98pE3ud>(WZANmsIB%8G-D1Scw=N8 z4e9(nD28M56~bU#jSn!l^|#Vd^apQ(Tr7AYk>(R?#`KT)699!rCCWm%t!zvs%60cd z7|?^H2YktRBbtd;mQn9aNc)L->hcBkt;Wp!yeo4HEVZ*=V<>y$XAQZ8T~-7rDV;UZN0ZoiyOd zd08!wg&3#z$JH$$MbCpwa3eZ-FG*-KMW_3u+3-1{h6-9^dnsDApVC~1DeKXLyYIvT z)wX31FnJ&m04o*7N;Ye}_gm@0A>AhG=H%xdJ(v{dFHCJUbJa>y;yW)4;S1OZUe^p#;GX(GI#q@eu*PYx{IOa zO#^tyHF1@fojw8$I{U2x)vWXG8{TtCt#pYbeRJBW74oqzM7q!a@pQGMA*Uiw)>`XB zyxE7b7!0JOpk&GLH0J8XK?AwZ4OW`B9_Q?akBiSn#QUu^RZMa6 z!l#^8p6fT&*O!#Y=*fHmfu8DB2$C{e4*?!beJsEpa0cR`-@_Vf&bWSQke*xvehF(D15Og(Ul2cUdub9(|9Q&uc@+5O;#BjrN% z&0}&>4PEJv_8BoZth@xlJr>(p$W!R5P5zMZl*Y@>8M4C|#^|VIlWc=yF?5Xv3Bm+VY7|PwHxUr11>@wZSnk{yQ*0nXCrRHVz zi;2W0-Np4{!9*qi-r(vH0ikae#~{tHuxNymoo%T;l1_D2?3K}?GC9L>LojZbBlncVB&(64{LIc4_p0g<#ew)b24GhWzJPzN;XhFO~+FXWqX!M(@WZG|8&) zzqS&ZkCf<`kAwiq1eJBx0cMdoXLN2_XpE$VKmn4!-!!q9Ih5Wu%gzUxCO3Ei+1o?I z9v~qi7cJ!O^jC+;kple(aEpJ~=#Btm*>B)mFe|v%D16W2!fE~GmaDK4{z_2~{^DDL zVvGA9@5+M0Yl}^{M|V#|D3_y2zn~l`^0Y886Gojc7ON~-Q^z#>h2%k-v-s>^oB@AB z8lgFKyWr!px#bIM2|>7wR$bheZra!K?#jlVDYFUiI*}Zv0RT|Oqjs67i#b)ZXekwL zBRKlQyNJ5zL+Zn1#c-K4!Jw=wepK|8oJ#1yZ%Ar{REeL|l1^l^RC!K<(dfZ9yN|KV z^HzE84?i!a3OfnJ#*2?`f;5{IcWvE1<1FAhYLb z@_P-{7c6MF&9FM;bm0r6GcI-rB8woSSUvqh?I^_JgoHjp;~J2~Zt_e;d>tuZ4zMxM!Dt zo81I7m5Z)w_|mnBn16Vhmlt}MLW$?8_*My!vN!BeyExV?yd+`R8DNm4*5Yf+^oBRn z^=AK*Fz}srrMWO&u%2R9us6aI+tVdAtu4jq$8;MHufY2YOz<^l>>EYdM^Z%Hs%d87 z7b~ri7EFl+XQ7H@|Im1Wf19}d?J->3k8*K07P3`|Z_1|WGnjDukqWOe6cL05XWJEd9;5!zx0f<2VRg;n2nly1=aMHMsOj72T zV6*!^i5mPBbHu0k!Jt5w5^KcimJD2{u7pFH`Yuxj$Sv0H%a2b3r-2Z<5m-hQQyoi3 zopgYeA}l9#wXk^=H`~}Xytw_cG@&MLP5m<``8sV5)Bc4*0V-HNf)y`ROytJ0$GjW$ z4poToc1is>Dw`b9@-{AP{i`qsk#R8JX-EeNhI@p!$4uI!v7MhtJ9&G4M9=ODQqtk=*Q!(p z*TnCyY->_NV~*f#2+c_{J)W+e%*snY(X8)(Df~ihT%P)o^vvu`d(z97-3cLgF@YgZ z;Ksm=lnDOJ%ZwEcCo^k#ESH|~>YLLvZf-rFPHW=0@|t{m%ilzOfQgS+T?IB)`+GF> zKiuAaZiOae(3SDOz#`11^NWx@rDVtna}K@8*0kB(t!b|uIy&?7ITo3}K}U>{4vH)a zEYsE^uP_tt&SAU56bgl8s^C%!ZzbFWBR5=o9btc z)e9GmXaFvoLse(_ZIe3fF|c*XDK@7__4W`^#ghKE&!z9hO3OhWQ?qo>G$GB3wm^1q zvec|kL)lWlCBREy45mlld(A^L8*Z$A7663*&I|pQ?A?FE=e1;eDLgzA3{BNpY^><~ z^qHFdjCZZJ_MSTJ4;;ztx@Uq8Sow#jK8uaSL{i@ahDXAn#aWI`Y={!jz>{+t0|PuU zN2+31KNmZzy#%wc(a~0Fi}5QPQR}c-M_SvOO*vA$kYKUWcb&*u9b+VlII$K1!Xy%0P>uWnFjcN|6k58iTMtQ+M(7nf>A3v zIP@fO-*A%XNLljkBOEW2mk+X6&~omiXXzjmbUcUopc7)WV?rk=M3(bBpW_~o=iT%p^v?C7~=sp{A%W1 z5BKa$+rB3feeQ{5Hf!@jtsH8zQZdnzx@+YiRqDIOw~S4}(;>Xnx{9nrR}Ht-nE+yh zcR*A*Nx~O2lmi4x&tk_OKFZ&`Ezkmj*&0=kzf&H2Xhbb<5t>#Z^ONjNBl(>u?8}&z zBSq*<+ncQbNmqyn$ELj5$V-5dD?ME&9p+==?*%sO%k3-8vgWl0neJ`uZLI?@sIw9q zRT@8jAO1c>4V6rgeDCk|=ZXKrh9`dtCU0CFB6fOnN$m4Yznh@4v-xLCBYsl1rHa{G zBud;YRcA0OPIWSKHWC3Dn}o)wE02Zv=1^0z9v&7P7XO8&jS>J@;epmE@acY;ygUI} zz;of`O;xz*1iuLiiHR-0Cou8KJVY$D=*xI@?MUkw_*TAbxuvLHb%^Lr-jw+l6IXyo zPPJ$XePUMr(hZkG+oX!RDIXY|>f%wy9aKc|e$ZniE^8s*Vg{|f;0!9QnK{UHXxKEb zCm1(OhluUqNFsl^wsq{!i~k(3|K5|N*vBmMWs?X+{g=lUm1i@nL&Sl_S{!EDPB5a< za8|Fy0Dqt%(Zzt#kb^?%!yFK;_XVfL2O%J7{lA1_l0ZiqKz(*1&?28#kfhG=zn86S ztm#J1>COBgF)Lbk2G9+i@=L=eBlnG{gq-9FcV--C)S2B_-#7;q#MOdNP0Npc!{zdh zff@fi`g08a&pgNnCJCCgDX7G{Al(gg<9ueVDQ;xZh!XU39u}G`4pEqdX82G`fcQA_ z|H2^omKp@|Ed*MukN^=B@#jR$4-);*rKbhb(SkbWO*?{XO%_LtzMr~H2bcZfcA@&PiY z9Q{zo{G3yfhd+PBpBXhIPz?F`L;g*%J(mLbWuwc5M7RHOVg29C5B^8WCI64lrM)(a Q5e+X_`$G&2^w+8X1u%?H6#xJL literal 0 HcmV?d00001 diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..8ca9118 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,44 @@ +fileio +------- +.. automodule:: mmcv.fileio + :members: + +image +------ +.. automodule:: mmcv.image + :members: + +video +------ +.. automodule:: mmcv.video + :members: + +arraymisc +--------- +.. automodule:: mmcv.arraymisc + :members: + +visualization +-------------- +.. automodule:: mmcv.visualization + :members: + +utils +----- +.. automodule:: mmcv.utils + :members: + +cnn +---- +.. automodule:: mmcv.cnn + :members: + +runner +------ +.. automodule:: mmcv.runner + :members: + +ops +------ +.. automodule:: mmcv.ops + :members: diff --git a/docs/community/contributing.md b/docs/community/contributing.md new file mode 120000 index 0000000..f939e75 --- /dev/null +++ b/docs/community/contributing.md @@ -0,0 +1 @@ +../../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/community/pr.md b/docs/community/pr.md new file mode 100644 index 0000000..77bdbf7 --- /dev/null +++ b/docs/community/pr.md @@ -0,0 +1,94 @@ +## Pull Request (PR) + +### What is PR + +`PR` is the abbreviation of `Pull Request`. Here's the definition of `PR` in the [official document](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) of Github. + +> Pull requests let you tell others about changes you've pushed to a branch in a repository on GitHub. Once a pull request is opened, you can discuss and review the potential changes with collaborators and add follow-up commits before your changes are merged into the base branch. + +### Basic Workflow + +1. Get the most recent codebase +2. Checkout a new branch from the master branch +3. Commit your changes +4. Push your changes and create a PR +5. Discuss and review your code +6. Merge your branch to the master branch + +### Procedures in detail + +1. Get the most recent codebase + + When you work on your first PR + - Fork the OpenMMLab repository: click the **fork** button at the top right corner of Github page + ![avatar](../_static/community/1.png) + + - Clone forked repository to local + ```bash + git clone git@github.com:XXX/mmcv.git + ``` + + - Add source repository to upstream + ```bash + git remote add upstream git@github.com:open-mmlab/mmcv + ``` + + + After your first PR + - Checkout master branch of the local repository and pull the latest master branch of the source repository + ```bash + git checkout master + git pull upstream master + ``` + +2. Checkout a new branch from the master branch + ```bash + git checkout -b branchname + ``` + +```{tip} +To make commit history clear, we strongly recommend you checkout the master branch before create a new branch. +``` + +3. Commit your changes + ```bash + # coding + git add [files] + git commit -m 'messages' + ``` + +4. Push your changes to the forked repository and create a PR + + Push the branch to your forked remote repository + ```bash + git push origin branchname + ``` + + + Create a PR + ![avatar](../_static/community/2.png) + + + Revise PR message template to describe your motivation and modifications made in this PR. You can also link the related issue to the PR manually in the PR message (For more information, checkout the [official guidance](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)). + +5. Discuss and review your code + + After creating a pull request, you can ask a specific person to review the changes you've proposed + ![avatar](../_static/community/3.png) + + + Modify your codes according to reviewers' suggestions and then push your changes + +6. Merge your branch to the master branch and delete the branch + ```bash + git branch -d branchname # delete local branch + git push origin --delete branchname # delete remote branch + ``` + +### PR Specs + +1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style +2. One short-time branch should be matched with only one PR +3. Accomplish a detailed change in one PR. Avoid large PR + >- Bad: Support Faster R-CNN + >- Acceptable: Add a box head to Faster R-CNN + >- Good: Add a parameter to box head to support custom conv-layer number +4. Provide clear and significant commit message +5. Provide clear and meaningful PR description + >- Task name should be clarified in title. The general format is: [Prefix] Short description of the PR (Suffix) + >- Prefix: add new feature [Feature], fix bug [Fix], related to documents [Docs], in developing [WIP] (which will not be reviewed temporarily) + >- Introduce main changes, results and influences on other modules in short description + >- Associate related issues and pull requests with a milestone diff --git a/docs/en/compatibility.md b/docs/compatibility.md similarity index 100% rename from docs/en/compatibility.md rename to docs/compatibility.md diff --git a/docs/zh_cn/conf.py b/docs/conf.py similarity index 62% rename from docs/zh_cn/conf.py rename to docs/conf.py index 7bfb9c2..bea4706 100644 --- a/docs/zh_cn/conf.py +++ b/docs/conf.py @@ -15,19 +15,21 @@ import os import sys import pytorch_sphinx_theme +from m2r import MdInclude +from recommonmark.transform import AutoStructify from sphinx.builders.html import StandaloneHTMLBuilder -sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath('..')) -version_file = '../../mmcv/version.py' -with open(version_file) as f: +version_file = '../mmcv/version.py' +with open(version_file, 'r') as f: exec(compile(f.read(), version_file, 'exec')) __version__ = locals()['__version__'] # -- Project information ----------------------------------------------------- project = 'mmcv' -copyright = '2018-2022, OpenMMLab' +copyright = '2018-2021, OpenMMLab' author = 'MMCV Authors' # The short X.Y version @@ -47,8 +49,6 @@ release = __version__ extensions = [ 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', 'sphinx.ext.autosectionlabel', @@ -57,18 +57,6 @@ extensions = [ 'sphinx_copybutton', ] # yapf: disable -myst_heading_anchors = 4 - -myst_enable_extensions = ['colon_fence'] - -# Configuration for intersphinx -intersphinx_mapping = { - 'python': ('https://docs.python.org/3', None), - 'numpy': ('https://numpy.org/doc/stable', None), - 'torch': ('https://pytorch.org/docs/stable/', None), - 'mmengine': ('https://mmengine.readthedocs.io/en/latest', None), -} - autodoc_mock_imports = ['mmcv._ext', 'mmcv.utils.ext_loader', 'torchvision'] autosectionlabel_prefix_document = True @@ -91,7 +79,7 @@ master_doc = 'index' # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = 'zh_CN' +language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -120,9 +108,92 @@ html_theme_options = { 'name': 'GitHub', 'url': 'https://github.com/open-mmlab/mmcv' }, - ], - # Specify the language of shared menu - 'menu_lang': 'cn', + { + 'name': + 'Docs', + 'children': [ + { + 'name': 'MMCV', + 'url': 'https://mmcv.readthedocs.io/en/latest/', + }, + { + 'name': 'MIM', + 'url': 'https://openmim.readthedocs.io/en/latest/' + }, + { + 'name': 'MMAction2', + 'url': 'https://mmaction2.readthedocs.io/en/latest/', + }, + { + 'name': 'MMClassification', + 'url': + 'https://mmclassification.readthedocs.io/en/latest/', + }, + { + 'name': 'MMDetection', + 'url': 'https://mmdetection.readthedocs.io/en/latest/', + }, + { + 'name': 'MMDetection3D', + 'url': 'https://mmdetection3d.readthedocs.io/en/latest/', + }, + { + 'name': 'MMEditing', + 'url': 'https://mmediting.readthedocs.io/en/latest/', + }, + { + 'name': 'MMGeneration', + 'url': 'https://mmgeneration.readthedocs.io/en/latest/', + }, + { + 'name': 'MMOCR', + 'url': 'https://mmocr.readthedocs.io/en/latest/', + }, + { + 'name': 'MMPose', + 'url': 'https://mmpose.readthedocs.io/en/latest/', + }, + { + 'name': 'MMSegmentation', + 'url': 'https://mmsegmentation.readthedocs.io/en/latest/', + }, + { + 'name': 'MMTracking', + 'url': 'https://mmtracking.readthedocs.io/en/latest/', + }, + { + 'name': 'MMFlow', + 'url': 'https://mmflow.readthedocs.io/en/latest/', + }, + { + 'name': 'MMFewShot', + 'url': 'https://mmfewshot.readthedocs.io/en/latest/', + }, + ] + }, + { + 'name': + 'OpenMMLab', + 'children': [ + { + 'name': 'Homepage', + 'url': 'https://openmmlab.com/' + }, + { + 'name': 'GitHub', + 'url': 'https://github.com/open-mmlab/' + }, + { + 'name': 'Twitter', + 'url': 'https://twitter.com/OpenMMLab' + }, + { + 'name': 'Zhihu', + 'url': 'https://zhihu.com/people/openmmlab' + }, + ] + }, + ] } # Add any paths that contain custom static files (such as style sheets) here, @@ -215,3 +286,16 @@ StandaloneHTMLBuilder.supported_image_types = [ # Ignore >>> when copying code copybutton_prompt_text = r'>>> |\.\.\. ' copybutton_prompt_is_regexp = True + + +def setup(app): + app.add_config_value('no_underscore_emphasis', False, 'env') + app.add_config_value('m2r_parse_relative_links', False, 'env') + app.add_config_value('m2r_anonymous_references', False, 'env') + app.add_config_value('m2r_disable_inline_math', False, 'env') + app.add_directive('mdinclude', MdInclude) + app.add_config_value('recommonmark_config', { + 'auto_toc_tree_section': 'Contents', + 'enable_eval_rst': True, + }, True) + app.add_transform(AutoStructify) diff --git a/docs/en/deployment/mmcv_ops_definition.md b/docs/deployment/mmcv_ops_definition.md similarity index 80% rename from docs/en/deployment/mmcv_ops_definition.md rename to docs/deployment/mmcv_ops_definition.md index d7eabb3..5696316 100644 --- a/docs/en/deployment/mmcv_ops_definition.md +++ b/docs/deployment/mmcv_ops_definition.md @@ -1,10 +1,7 @@ -# MMCV Operators - -To make custom operators in MMCV more standard, precise definitions of each operator are listed in this document. +# Definition of custom operators in MMCV - -- [MMCV Operators](#mmcv-operators) +- [Definition of custom operators in MMCV](#definition-of-custom-operators-in-mmcv) - [MMCVBorderAlign](#mmcvborderalign) - [Description](#description) - [Parameters](#parameters) @@ -83,26 +80,25 @@ To make custom operators in MMCV more standard, precise definitions of each oper - [Inputs](#inputs-12) - [Outputs](#outputs-12) - [Type Constraints](#type-constraints-12) - - [grid_sampler\*](#grid_sampler) +- [torch](#torch) + - [grid_sampler](#grid_sampler) - [Description](#description-13) - [Parameters](#parameters-13) - [Inputs](#inputs-13) - [Outputs](#outputs-13) - [Type Constraints](#type-constraints-13) - - [cummax\*](#cummax) + - [cummax](#cummax) - [Description](#description-14) - [Parameters](#parameters-14) - [Inputs](#inputs-14) - [Outputs](#outputs-14) - [Type Constraints](#type-constraints-14) - - [cummin\*](#cummin) + - [cummin](#cummin) - [Description](#description-15) - [Parameters](#parameters-15) - [Inputs](#inputs-15) - [Outputs](#outputs-15) - [Type Constraints](#type-constraints-15) - - [Reminders](#reminders) - ## MMCVBorderAlign @@ -122,9 +118,9 @@ Read [BorderDet: Border Feature for Dense Object Detection](ttps://arxiv.org/abs ### Parameters -| Type | Parameter | Description | -| ----- | ----------- | ----------------------------------------------------------------------------------- | -| `int` | `pool_size` | number of positions sampled over the boxes' borders(e.g. top, bottom, left, right). | +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | +| `int` | `pool_size` | number of positions sampled over the boxes' borders(e.g. top, bottom, left, right). | ### Inputs @@ -156,11 +152,11 @@ Read [CARAFE: Content-Aware ReAssembly of FEatures](https://arxiv.org/abs/1905.0 ### Parameters -| Type | Parameter | Description | -| ------- | -------------- | --------------------------------------------- | -| `int` | `kernel_size` | reassemble kernel size, should be odd integer | -| `int` | `group_size` | reassemble group size | -| `float` | `scale_factor` | upsample ratio(>=1) | +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | +| `int` | `kernel_size` | reassemble kernel size, should be odd integer| +| `int` | `group_size` | reassemble group size | +| `float` | `scale_factor` | upsample ratio(>=1) | ### Inputs @@ -191,7 +187,8 @@ Read [CCNet: Criss-Cross Attention for SemanticSegmentation](https://arxiv.org/p ### Parameters -None +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | ### Inputs @@ -222,7 +219,8 @@ Read [CCNet: Criss-Cross Attention for SemanticSegmentation](https://arxiv.org/p ### Parameters -None +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | ### Inputs @@ -244,6 +242,7 @@ None - T:tensor(float32) + ## MMCVCornerPool ### Description @@ -252,9 +251,9 @@ Perform CornerPool on `input` features. Read [CornerNet -- Detecting Objects as ### Parameters -| Type | Parameter | Description | -| ----- | --------- | ---------------------------------------------------------------- | -| `int` | `mode` | corner pool mode, (0: `top`, 1: `bottom`, 2: `left`, 3: `right`) | +| Type | Parameter | Description | +| ------- | --------------- | ---------------------------------------------------------------- | +| `int` | `mode` | corner pool mode, (0: `top`, 1: `bottom`, 2: `left`, 3: `right`) | ### Inputs @@ -284,15 +283,15 @@ Read [Deformable Convolutional Networks](https://arxiv.org/pdf/1703.06211.pdf) f ### Parameters -| Type | Parameter | Description | -| -------------- | ------------------- | ----------------------------------------------------------------------------------------------------------------- | -| `list of ints` | `stride` | The stride of the convolving kernel, (sH, sW). Defaults to `(1, 1)`. | -| `list of ints` | `padding` | Paddings on both sides of the input, (padH, padW). Defaults to `(0, 0)`. | -| `list of ints` | `dilation` | The spacing between kernel elements (dH, dW). Defaults to `(1, 1)`. | -| `int` | `groups` | Split input into groups. `input_channel` should be divisible by the number of groups. Defaults to `1`. | -| `int` | `deformable_groups` | Groups of deformable offset. Defaults to `1`. | -| `int` | `bias` | Whether to add a learnable bias to the output. `0` stands for `False` and `1` stands for `True`. Defaults to `0`. | -| `int` | `im2col_step` | Groups of deformable offset. Defaults to `32`. | +| Type | Parameter | Description | +| -------------- | ------------------ | ------------------------------------------------------------------------------------- | +| `list of ints` | `stride` | The stride of the convolving kernel, (sH, sW). Defaults to `(1, 1)`. | +| `list of ints` | `padding` | Paddings on both sides of the input, (padH, padW). Defaults to `(0, 0)`. | +| `list of ints` | `dilation` | The spacing between kernel elements (dH, dW). Defaults to `(1, 1)`. | +| `int` | `groups` | Split input into groups. `input_channel` should be divisible by the number of groups. Defaults to `1`.| +| `int` | `deformable_groups` | Groups of deformable offset. Defaults to `1`. | +| `int` | `bias` | Whether to add a learnable bias to the output. `0` stands for `False` and `1` stands for `True`. Defaults to `0`. | +| `int` | `im2col_step` | Groups of deformable offset. Defaults to `32`. | ### Inputs @@ -324,11 +323,11 @@ Perform Modulated Deformable Convolution on input feature, read [Deformable Conv ### Parameters -| Type | Parameter | Description | -| -------------- | ------------------- | ------------------------------------------------------------------------------------- | -| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | -| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | -| `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | +| Type | Parameter | Description | +| -------------- | ------------------ | ------------------------------------------------------------------------------------- | +| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | +| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | +| `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | | `int` | `deformable_groups` | Groups of deformable offset. | | `int` | `groups` | Split input into groups. `input_channel` should be divisible by the number of groups. | @@ -366,13 +365,13 @@ Deformable roi pooling layer ### Parameters -| Type | Parameter | Description | -| ------- | ---------------- | ------------------------------------------------------------------------------------------------------------- | +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | | `int` | `output_height` | height of output roi | | `int` | `output_width` | width of output roi | | `float` | `spatial_scale` | used to scale the input boxes | | `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | -| `float` | `gamma` | gamma | +| `float` | `gamma` | gamma | ### Inputs @@ -405,10 +404,10 @@ Read [Pixel Recurrent Neural Networks](https://arxiv.org/abs/1601.06759) for mor ### Parameters -| Type | Parameter | Description | -| -------------- | --------- | -------------------------------------------------------------------------------- | -| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW). **Only support stride=1 in mmcv** | -| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW). Defaults to `(0, 0)`. | +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | +| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW). **Only support stride=1 in mmcv** | +| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW). Defaults to `(0, 0)`. | ### Inputs @@ -444,10 +443,10 @@ Read [PSANet: Point-wise Spatial Attention Network for Scene Parsing](https://hs ### Parameters -| Type | Parameter | Description | -| -------------- | ----------- | -------------------------------------------- | -| `int` | `psa_type` | `0` means collect and `1` means `distribute` | -| `list of ints` | `mask_size` | The size of mask | +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | +| `int` | `psa_type` | `0` means collect and `1` means `distribute` | +| `list of ints` | `mask_size` | The size of mask | ### Inputs @@ -479,9 +478,9 @@ Note this definition is slightly different with [onnx: NonMaxSuppression](https: | Type | Parameter | Description | | ------- | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | -| `int` | `center_point_box` | 0 - the box data is supplied as \[y1, x1, y2, x2\], 1-the box data is supplied as \[x_center, y_center, width, height\]. | +| `int` | `center_point_box` | 0 - the box data is supplied as [y1, x1, y2, x2], 1-the box data is supplied as [x_center, y_center, width, height]. | | `int` | `max_output_boxes_per_class` | The maximum number of boxes to be selected per batch per class. Default to 0, number of output boxes equal to number of input boxes. | -| `float` | `iou_threshold` | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range \[0, 1\]. Default to 0. | +| `float` | `iou_threshold` | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0. | | `float` | `score_threshold` | The threshold for deciding when to remove boxes based on score. | | `int` | `offset` | 0 or 1, boxes' width or height is (x2 - x1 + offset). | @@ -544,6 +543,7 @@ Perform RoIAlign on output feature, used in bbox_head of most two-stage detector - T:tensor(float32) + ## MMCVRoIAlignRotated ### Description @@ -552,15 +552,15 @@ Perform RoI align pooling for rotated proposals ### Parameters -| Type | Parameter | Description | -| ------- | ---------------- | ------------------------------------------------------------------------------------------------------------- | +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | | `int` | `output_height` | height of output roi | | `int` | `output_width` | width of output roi | | `float` | `spatial_scale` | used to scale the input boxes | | `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | | `str` | `mode` | pooling mode in each bin. `avg` or `max` | | `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | -| `int` | `clockwise` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | +| `int` | `clockwise` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | ### Inputs @@ -581,7 +581,9 @@ Perform RoI align pooling for rotated proposals - T:tensor(float32) -## grid_sampler\* +# torch + +## grid_sampler ### Description @@ -617,7 +619,7 @@ Check [torch.nn.functional.grid_sample](https://pytorch.org/docs/stable/generate - T:tensor(float32, Linear) -## cummax\* +## cummax ### Description @@ -625,9 +627,9 @@ Returns a tuple (`values`, `indices`) where `values` is the cumulative maximum e ### Parameters -| Type | Parameter | Description | -| ----- | --------- | -------------------------------------- | -| `int` | `dim` | the dimension to do the operation over | +| Type | Parameter | Description | +| ------- | --------------- | ---------------------------------------------------------------- | +| `int` | `dim` | the dimension to do the operation over | ### Inputs @@ -649,7 +651,7 @@ Returns a tuple (`values`, `indices`) where `values` is the cumulative maximum e - T:tensor(float32) -## cummin\* +## cummin ### Description @@ -657,9 +659,9 @@ Returns a tuple (`values`, `indices`) where `values` is the cumulative minimum e ### Parameters -| Type | Parameter | Description | -| ----- | --------- | -------------------------------------- | -| `int` | `dim` | the dimension to do the operation over | +| Type | Parameter | Description | +| ------- | --------------- | ---------------------------------------------------------------- | +| `int` | `dim` | the dimension to do the operation over | ### Inputs @@ -680,7 +682,3 @@ Returns a tuple (`values`, `indices`) where `values` is the cumulative minimum e ### Type Constraints - T:tensor(float32) - -## Reminders - -- Operators endwith `*` are defined in Torch and are included here for the conversion to ONNX. diff --git a/docs/deployment/onnx.md b/docs/deployment/onnx.md new file mode 100644 index 0000000..be6c59c --- /dev/null +++ b/docs/deployment/onnx.md @@ -0,0 +1,19 @@ +## Introduction of onnx module in MMCV (Experimental) + +### register_extra_symbolics + +Some extra symbolic functions need to be registered before exporting PyTorch model to ONNX. + +#### Example + +```python +import mmcv +from mmcv.onnx import register_extra_symbolics + +opset_version = 11 +register_extra_symbolics(opset_version) +``` + +#### FAQs + +- None diff --git a/docs/deployment/onnxruntime_custom_ops.md b/docs/deployment/onnxruntime_custom_ops.md new file mode 100644 index 0000000..baaa576 --- /dev/null +++ b/docs/deployment/onnxruntime_custom_ops.md @@ -0,0 +1,378 @@ +## Onnxruntime Custom Ops + + + +- [Onnxruntime Custom Ops](#onnxruntime-custom-ops) + - [SoftNMS](#softnms) + - [Description](#description) + - [Parameters](#parameters) + - [Inputs](#inputs) + - [Outputs](#outputs) + - [Type Constraints](#type-constraints) + - [RoIAlign](#roialign) + - [Description](#description-1) + - [Parameters](#parameters-1) + - [Inputs](#inputs-1) + - [Outputs](#outputs-1) + - [Type Constraints](#type-constraints-1) + - [NMS](#nms) + - [Description](#description-2) + - [Parameters](#parameters-2) + - [Inputs](#inputs-2) + - [Outputs](#outputs-2) + - [Type Constraints](#type-constraints-2) + - [grid_sampler](#grid_sampler) + - [Description](#description-3) + - [Parameters](#parameters-3) + - [Inputs](#inputs-3) + - [Outputs](#outputs-3) + - [Type Constraints](#type-constraints-3) + - [CornerPool](#cornerpool) + - [Description](#description-4) + - [Parameters](#parameters-4) + - [Inputs](#inputs-4) + - [Outputs](#outputs-4) + - [Type Constraints](#type-constraints-4) + - [cummax](#cummax) + - [Description](#description-5) + - [Parameters](#parameters-5) + - [Inputs](#inputs-5) + - [Outputs](#outputs-5) + - [Type Constraints](#type-constraints-5) + - [cummin](#cummin) + - [Description](#description-6) + - [Parameters](#parameters-6) + - [Inputs](#inputs-6) + - [Outputs](#outputs-6) + - [Type Constraints](#type-constraints-6) + - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) + - [Description](#description-7) + - [Parameters](#parameters-7) + - [Inputs](#inputs-7) + - [Outputs](#outputs-7) + - [Type Constraints](#type-constraints-7) + - [MMCVDeformConv2d](#mmcvdeformconv2d) + - [Description](#description-8) + - [Parameters](#parameters-8) + - [Inputs](#inputs-8) + - [Outputs](#outputs-8) + - [Type Constraints](#type-constraints-8) + + + +### SoftNMS + +#### Description + +Perform soft NMS on `boxes` with `scores`. Read [Soft-NMS -- Improving Object Detection With One Line of Code](https://arxiv.org/abs/1704.04503) for detail. + +#### Parameters + +| Type | Parameter | Description | +| ------- | --------------- | -------------------------------------------------------------- | +| `float` | `iou_threshold` | IoU threshold for NMS | +| `float` | `sigma` | hyperparameter for gaussian method | +| `float` | `min_score` | score filter threshold | +| `int` | `method` | method to do the nms, (0: `naive`, 1: `linear`, 2: `gaussian`) | +| `int` | `offset` | `boxes` width or height is (x2 - x1 + offset). (0 or 1) | + +#### Inputs + +

+
boxes: T
+
Input boxes. 2-D tensor of shape (N, 4). N is the number of boxes.
+
scores: T
+
Input scores. 1-D tensor of shape (N, ).
+
+ +#### Outputs + +
+
dets: T
+
Output boxes and scores. 2-D tensor of shape (num_valid_boxes, 5), [[x1, y1, x2, y2, score], ...]. num_valid_boxes is the number of valid boxes.
+
indices: tensor(int64)
+
Output indices. 1-D tensor of shape (num_valid_boxes, ).
+
+ +#### Type Constraints + +- T:tensor(float32) + +### RoIAlign + +#### Description + +Perform RoIAlign on output feature, used in bbox_head of most two-stage detectors. + +#### Parameters + +| Type | Parameter | Description | +| ------- | ---------------- | ------------------------------------------------------------------------------------------------------------- | +| `int` | `output_height` | height of output roi | +| `int` | `output_width` | width of output roi | +| `float` | `spatial_scale` | used to scale the input boxes | +| `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | +| `str` | `mode` | pooling mode in each bin. `avg` or `max` | +| `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | + +#### Inputs + +
+
input: T
+
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
+
rois: T
+
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of input.
+
+ +#### Outputs + +
+
feat: T
+
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element feat[r-1] is a pooled feature map corresponding to the r-th RoI RoIs[r-1].
+
+ +#### Type Constraints + +- T:tensor(float32) + +### NMS + +#### Description + +Filter out boxes has high IoU overlap with previously selected boxes. + +#### Parameters + +| Type | Parameter | Description | +| ------- | --------------- | ---------------------------------------------------------------------------------------------------------------- | +| `float` | `iou_threshold` | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0. | +| `int` | `offset` | 0 or 1, boxes' width or height is (x2 - x1 + offset). | + +#### Inputs + +
+
bboxes: T
+
Input boxes. 2-D tensor of shape (num_boxes, 4). num_boxes is the number of input boxes.
+
scores: T
+
Input scores. 1-D tensor of shape (num_boxes, ).
+
+ +#### Outputs + +
+
indices: tensor(int32, Linear)
+
Selected indices. 1-D tensor of shape (num_valid_boxes, ). num_valid_boxes is the number of valid boxes.
+
+ +#### Type Constraints + +- T:tensor(float32) + +### grid_sampler + +#### Description + +Perform sample from `input` with pixel locations from `grid`. + +#### Parameters + +| Type | Parameter | Description | +| ----- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`) | +| `int` | `padding_mode` | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`) | +| `int` | `align_corners` | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. | + +#### Inputs + +
+
input: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
+
grid: T
+
Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW is the height and width of offset and output.
+
+ +#### Outputs + +
+
output: T
+
Output feature; 4-D tensor of shape (N, C, outH, outW).
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +### CornerPool + +#### Description + +Perform CornerPool on `input` features. Read [CornerNet -- Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244) for more details. + +#### Parameters + +| Type | Parameter | Description | +| ----- | --------- | ---------------------------------------------------------------- | +| `int` | `mode` | corner pool mode, (0: `top`, 1: `bottom`, 2: `left`, 3: `right`) | + +#### Inputs + +
+
input: T
+
Input features. 4-D tensor of shape (N, C, H, W). N is the batch size.
+
+ +#### Outputs + +
+
output: T
+
Output the pooled features. 4-D tensor of shape (N, C, H, W).
+
+ +#### Type Constraints + +- T:tensor(float32) + +### cummax + +#### Description + +Returns a tuple (`values`, `indices`) where `values` is the cumulative maximum elements of `input` in the dimension `dim`. And `indices` is the index location of each maximum value found in the dimension `dim`. Read [torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html) for more details. + +#### Parameters + +| Type | Parameter | Description | +| ----- | --------- | -------------------------------------- | +| `int` | `dim` | the dimension to do the operation over | + +#### Inputs + +
+
input: T
+
The input tensor with various shapes. Tensor with empty element is also supported.
+
+ +#### Outputs + +
+
output: T
+
Output the cumulative maximum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.
+
indices: tensor(int64)
+
Output the index location of each cumulative maximum value found in the dimension `dim`, with the same shape as `input`.
+
+ +#### Type Constraints + +- T:tensor(float32) + +### cummin + +#### Description + +Returns a tuple (`values`, `indices`) where `values` is the cumulative minimum elements of `input` in the dimension `dim`. And `indices` is the index location of each minimum value found in the dimension `dim`. Read [torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html) for more details. + +#### Parameters + +| Type | Parameter | Description | +| ----- | --------- | -------------------------------------- | +| `int` | `dim` | the dimension to do the operation over | + +#### Inputs + +
+
input: T
+
The input tensor with various shapes. Tensor with empty element is also supported.
+
+ +#### Outputs + +
+
output: T
+
Output the cumulative minimum elements of `input` in the dimension `dim`, with the same shape and dtype as `input`.
+
indices: tensor(int64)
+
Output the index location of each cumulative minimum value found in the dimension `dim`, with the same shape as `input`.
+
+ +#### Type Constraints + +- T:tensor(float32) + +### MMCVModulatedDeformConv2d + +#### Description + +Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail. + +#### Parameters + +| Type | Parameter | Description | +| -------------- | ------------------- | ------------------------------------------------------------------------------------- | +| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | +| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | +| `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | +| `int` | `deformable_groups` | Groups of deformable offset. | +| `int` | `groups` | Split input into groups. `input_channel` should be divisible by the number of groups. | + +#### Inputs + +
+
inputs[0]: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.
+
inputs[1]: T
+
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
+
inputs[2]: T
+
Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
+
inputs[3]: T
+
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
+
inputs[4]: T, optional
+
Input bias; 1-D tensor of shape (output_channel).
+
+ +#### Outputs + +
+
outputs[0]: T
+
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +## MMCVDeformConv2d + +### Description + +Perform Deformable Convolution on input feature, read [Deformable Convolutional Network](https://arxiv.org/abs/1703.06211) for detail. + +### Parameters + +| Type | Parameter | Description | +| -------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------- | +| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | +| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | +| `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | +| `int` | `deformable_group` | Groups of deformable offset. | +| `int` | `group` | Split input into groups. `input_channel` should be divisible by the number of groups. | +| `int` | `im2col_step` | DeformableConv2d use im2col to compute convolution. im2col_step is used to split input and offset, reduce memory usage of column. | + +### Inputs + +
+
inputs[0]: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
+
inputs[1]: T
+
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
+
inputs[2]: T
+
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
+
+ +### Outputs + +
+
outputs[0]: T
+
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
+
+ +### Type Constraints + +- T:tensor(float32, Linear) diff --git a/docs/deployment/onnxruntime_op.md b/docs/deployment/onnxruntime_op.md new file mode 100644 index 0000000..f17b32a --- /dev/null +++ b/docs/deployment/onnxruntime_op.md @@ -0,0 +1,126 @@ +## Custom operators for ONNX Runtime in MMCV + +### Introduction of ONNX Runtime + +**ONNX Runtime** is a cross-platform inferencing and training accelerator compatible with many popular ML/DNN frameworks. Check its [github](https://github.com/microsoft/onnxruntime) for more information. + +### Introduction of ONNX + +**ONNX** stands for **Open Neural Network Exchange**, which acts as *Intermediate Representation(IR)* for ML/DNN models from many frameworks. Check its [github](https://github.com/onnx/onnx) for more information. + +### Why include custom operators for ONNX Runtime in MMCV + +- To verify the correctness of exported ONNX models in ONNX Runtime. +- To ease the deployment of ONNX models with custom operators from `mmcv.ops` in ONNX Runtime. + +### List of operators for ONNX Runtime supported in MMCV + +| Operator | CPU | GPU | MMCV Releases | +| :----------------------------------------------------: | :---: | :---: | :-----------: | +| [SoftNMS](onnxruntime_custom_ops.md#softnms) | Y | N | 1.2.3 | +| [RoIAlign](onnxruntime_custom_ops.md#roialign) | Y | N | 1.2.5 | +| [NMS](onnxruntime_custom_ops.md#nms) | Y | N | 1.2.7 | +| [grid_sampler](onnxruntime_custom_ops.md#grid_sampler) | Y | N | 1.3.1 | +| [CornerPool](onnxruntime_custom_ops.md#cornerpool) | Y | N | 1.3.4 | +| [cummax](onnxruntime_custom_ops.md#cummax) | Y | N | master | +| [cummin](onnxruntime_custom_ops.md#cummin) | Y | N | master | + +### How to build custom operators for ONNX Runtime + +*Please be noted that only **onnxruntime>=1.8.1** of CPU version on Linux platform is tested by now.* + +#### Prerequisite + +- Clone repository + +```bash +git clone https://github.com/open-mmlab/mmcv.git +``` + +- Download `onnxruntime-linux` from ONNX Runtime [releases](https://github.com/microsoft/onnxruntime/releases/tag/v1.8.1), extract it, expose `ONNXRUNTIME_DIR` and finally add the lib path to `LD_LIBRARY_PATH` as below: + +```bash +wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz + +tar -zxvf onnxruntime-linux-x64-1.8.1.tgz +cd onnxruntime-linux-x64-1.8.1 +export ONNXRUNTIME_DIR=$(pwd) +export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH +``` + +#### Build on Linux + +```bash +cd mmcv ## to MMCV root directory +MMCV_WITH_OPS=1 MMCV_WITH_ORT=1 python setup.py develop +``` + +### How to do inference using exported ONNX models with custom operators in ONNX Runtime in python + +Install ONNX Runtime with `pip` + +```bash +pip install onnxruntime==1.8.1 +``` + +Inference Demo + +```python +import os + +import numpy as np +import onnxruntime as ort + +from mmcv.ops import get_onnxruntime_op_path + +ort_custom_op_path = get_onnxruntime_op_path() +assert os.path.exists(ort_custom_op_path) +session_options = ort.SessionOptions() +session_options.register_custom_ops_library(ort_custom_op_path) +## exported ONNX model with custom operators +onnx_file = 'sample.onnx' +input_data = np.random.randn(1, 3, 224, 224).astype(np.float32) +sess = ort.InferenceSession(onnx_file, session_options) +onnx_results = sess.run(None, {'input' : input_data}) +``` + +### How to add a new custom operator for ONNX Runtime in MMCV + +#### Reminder + +- The custom operator is not included in [supported operator list](https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md) in ONNX Runtime. +- The custom operator should be able to be exported to ONNX. + +#### Main procedures + +Take custom operator `soft_nms` for example. + +1. Add header `soft_nms.h` to ONNX Runtime include directory `mmcv/ops/csrc/onnxruntime/` +2. Add source `soft_nms.cpp` to ONNX Runtime source directory `mmcv/ops/csrc/onnxruntime/cpu/` +3. Register `soft_nms` operator in [onnxruntime_register.cpp](../../mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp) + + ```c++ + #include "soft_nms.h" + + SoftNmsOp c_SoftNmsOp; + + if (auto status = ortApi->CustomOpDomain_Add(domain, &c_SoftNmsOp)) { + return status; + } + ``` + +4. Add unit test into `tests/test_ops/test_onnx.py` + Check [here](../../tests/test_ops/test_onnx.py) for examples. + +**Finally, welcome to send us PR of adding custom operators for ONNX Runtime in MMCV.** :nerd_face: + +### Known Issues + +- "RuntimeError: tuple appears in op that does not forward tuples, unsupported kind: `prim::PythonOp`." + 1. Note generally `cummax` or `cummin` is exportable to ONNX as long as the torch version >= 1.5.0, since `torch.cummax` is only supported with torch >= 1.5.0. But when `cummax` or `cummin` serves as an intermediate component whose outputs is used as inputs for another modules, it's expected that torch version must be >= 1.7.0. Otherwise the above error might arise, when running exported ONNX model with onnxruntime. + 2. Solution: update the torch version to 1.7.0 or higher. + +### References + +- [How to export Pytorch model with custom op to ONNX and run it in ONNX Runtime](https://github.com/onnx/tutorials/blob/master/PyTorchCustomOperator/README.md) +- [How to add a custom operator/kernel in ONNX Runtime](https://github.com/microsoft/onnxruntime/blob/master/docs/AddingCustomOp.md) diff --git a/docs/deployment/tensorrt_custom_ops.md b/docs/deployment/tensorrt_custom_ops.md new file mode 100644 index 0000000..be47e35 --- /dev/null +++ b/docs/deployment/tensorrt_custom_ops.md @@ -0,0 +1,395 @@ +## TensorRT Custom Ops + + + +- [TensorRT Custom Ops](#tensorrt-custom-ops) + - [MMCVRoIAlign](#mmcvroialign) + - [Description](#description) + - [Parameters](#parameters) + - [Inputs](#inputs) + - [Outputs](#outputs) + - [Type Constraints](#type-constraints) + - [ScatterND](#scatternd) + - [Description](#description-1) + - [Parameters](#parameters-1) + - [Inputs](#inputs-1) + - [Outputs](#outputs-1) + - [Type Constraints](#type-constraints-1) + - [NonMaxSuppression](#nonmaxsuppression) + - [Description](#description-2) + - [Parameters](#parameters-2) + - [Inputs](#inputs-2) + - [Outputs](#outputs-2) + - [Type Constraints](#type-constraints-2) + - [MMCVDeformConv2d](#mmcvdeformconv2d) + - [Description](#description-3) + - [Parameters](#parameters-3) + - [Inputs](#inputs-3) + - [Outputs](#outputs-3) + - [Type Constraints](#type-constraints-3) + - [grid_sampler](#grid_sampler) + - [Description](#description-4) + - [Parameters](#parameters-4) + - [Inputs](#inputs-4) + - [Outputs](#outputs-4) + - [Type Constraints](#type-constraints-4) + - [cummax](#cummax) + - [Description](#description-5) + - [Parameters](#parameters-5) + - [Inputs](#inputs-5) + - [Outputs](#outputs-5) + - [Type Constraints](#type-constraints-5) + - [cummin](#cummin) + - [Description](#description-6) + - [Parameters](#parameters-6) + - [Inputs](#inputs-6) + - [Outputs](#outputs-6) + - [Type Constraints](#type-constraints-6) + - [MMCVInstanceNormalization](#mmcvinstancenormalization) + - [Description](#description-7) + - [Parameters](#parameters-7) + - [Inputs](#inputs-7) + - [Outputs](#outputs-7) + - [Type Constraints](#type-constraints-7) + - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) + - [Description](#description-8) + - [Parameters](#parameters-8) + - [Inputs](#inputs-8) + - [Outputs](#outputs-8) + - [Type Constraints](#type-constraints-8) + + + +### MMCVRoIAlign + +#### Description + +Perform RoIAlign on output feature, used in bbox_head of most two stage +detectors. + +#### Parameters + +| Type | Parameter | Description | +| ------- | ---------------- | ------------------------------------------------------------------------------------------------------------- | +| `int` | `output_height` | height of output roi | +| `int` | `output_width` | width of output roi | +| `float` | `spatial_scale` | used to scale the input boxes | +| `int` | `sampling_ratio` | number of input samples to take for each output sample. `0` means to take samples densely for current models. | +| `str` | `mode` | pooling mode in each bin. `avg` or `max` | +| `int` | `aligned` | If `aligned=0`, use the legacy implementation in MMDetection. Else, align the results more perfectly. | + +#### Inputs + +
+
inputs[0]: T
+
Input feature map; 4D tensor of shape (N, C, H, W), where N is the batch size, C is the numbers of channels, H and W are the height and width of the data.
+
inputs[1]: T
+
RoIs (Regions of Interest) to pool over; 2-D tensor of shape (num_rois, 5) given as [[batch_index, x1, y1, x2, y2], ...]. The RoIs' coordinates are the coordinate system of inputs[0].
+
+ +#### Outputs + +
+
outputs[0]: T
+
RoI pooled output, 4-D tensor of shape (num_rois, C, output_height, output_width). The r-th batch element output[0][r-1] is a pooled feature map corresponding to the r-th RoI inputs[1][r-1].
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +### ScatterND + +#### Description + +ScatterND takes three inputs `data` tensor of rank r >= 1, `indices` tensor of rank q >= 1, and `updates` tensor of rank q + r - indices.shape[-1] - 1. The output of the operation is produced by creating a copy of the input `data`, and then updating its value to values specified by updates at specific index positions specified by `indices`. Its output shape is the same as the shape of `data`. Note that `indices` should not have duplicate entries. That is, two or more updates for the same index-location is not supported. + +The `output` is calculated via the following equation: + +```python + output = np.copy(data) + update_indices = indices.shape[:-1] + for idx in np.ndindex(update_indices): + output[indices[idx]] = updates[idx] +``` + +#### Parameters + +None + +#### Inputs + +
+
inputs[0]: T
+
Tensor of rank r>=1.
+ +
inputs[1]: tensor(int32, Linear)
+
Tensor of rank q>=1.
+ +
inputs[2]: T
+
Tensor of rank q + r - indices_shape[-1] - 1.
+
+ +#### Outputs + +
+
outputs[0]: T
+
Tensor of rank r >= 1.
+
+ +#### Type Constraints + +- T:tensor(float32, Linear), tensor(int32, Linear) + +### NonMaxSuppression + +#### Description + +Filter out boxes has high IoU overlap with previously selected boxes or low score. Output the indices of valid boxes. Indices of invalid boxes will be filled with -1. + +#### Parameters + +| Type | Parameter | Description | +| ------- | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ | +| `int` | `center_point_box` | 0 - the box data is supplied as [y1, x1, y2, x2], 1-the box data is supplied as [x_center, y_center, width, height]. | +| `int` | `max_output_boxes_per_class` | The maximum number of boxes to be selected per batch per class. Default to 0, number of output boxes equal to number of input boxes. | +| `float` | `iou_threshold` | The threshold for deciding whether boxes overlap too much with respect to IoU. Value range [0, 1]. Default to 0. | +| `float` | `score_threshold` | The threshold for deciding when to remove boxes based on score. | +| `int` | `offset` | 0 or 1, boxes' width or height is (x2 - x1 + offset). | + +#### Inputs + +
+
inputs[0]: T
+
Input boxes. 3-D tensor of shape (num_batches, spatial_dimension, 4).
+
inputs[1]: T
+
Input scores. 3-D tensor of shape (num_batches, num_classes, spatial_dimension).
+
+ +#### Outputs + +
+
outputs[0]: tensor(int32, Linear)
+
Selected indices. 2-D tensor of shape (num_selected_indices, 3) as [[batch_index, class_index, box_index], ...].
+
num_selected_indices=num_batches* num_classes* min(max_output_boxes_per_class, spatial_dimension).
+
All invalid indices will be filled with -1.
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +### MMCVDeformConv2d + +#### Description + +Perform Deformable Convolution on input feature, read [Deformable Convolutional Network](https://arxiv.org/abs/1703.06211) for detail. + +#### Parameters + +| Type | Parameter | Description | +| -------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------- | +| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | +| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | +| `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | +| `int` | `deformable_group` | Groups of deformable offset. | +| `int` | `group` | Split input into groups. `input_channel` should be divisible by the number of groups. | +| `int` | `im2col_step` | DeformableConv2d use im2col to compute convolution. im2col_step is used to split input and offset, reduce memory usage of column. | + +#### Inputs + +
+
inputs[0]: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
+
inputs[1]: T
+
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
+
inputs[2]: T
+
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
+
+ +#### Outputs + +
+
outputs[0]: T
+
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +### grid_sampler + +#### Description + +Perform sample from `input` with pixel locations from `grid`. + +#### Parameters + +| Type | Parameter | Description | +| ----- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `int` | `interpolation_mode` | Interpolation mode to calculate output values. (0: `bilinear` , 1: `nearest`) | +| `int` | `padding_mode` | Padding mode for outside grid values. (0: `zeros`, 1: `border`, 2: `reflection`) | +| `int` | `align_corners` | If `align_corners=1`, the extrema (`-1` and `1`) are considered as referring to the center points of the input's corner pixels. If `align_corners=0`, they are instead considered as referring to the corner points of the input's corner pixels, making the sampling more resolution agnostic. | + +#### Inputs + +
+
inputs[0]: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the numbers of channels, inH and inW are the height and width of the data.
+
inputs[1]: T
+
Input offset; 4-D tensor of shape (N, outH, outW, 2), where outH and outW is the height and width of offset and output.
+
+ +#### Outputs + +
+
outputs[0]: T
+
Output feature; 4-D tensor of shape (N, C, outH, outW).
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +### cummax + +#### Description + +Returns a namedtuple (`values`, `indices`) where `values` is the cumulative maximum of elements of `input` in the dimension `dim`. And `indices` is the index location of each maximum value found in the dimension `dim`. + +#### Parameters + +| Type | Parameter | Description | +| ----- | --------- | --------------------------------------- | +| `int` | `dim` | The dimension to do the operation over. | + +#### Inputs + +
+
inputs[0]: T
+
The input tensor.
+
+ +#### Outputs + +
+
outputs[0]: T
+
Output values.
+
outputs[1]: (int32, Linear)
+
Output indices.
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +### cummin + +#### Description + +Returns a namedtuple (`values`, `indices`) where `values` is the cumulative minimum of elements of `input` in the dimension `dim`. And `indices` is the index location of each minimum value found in the dimension `dim`. + +#### Parameters + +| Type | Parameter | Description | +| ----- | --------- | --------------------------------------- | +| `int` | `dim` | The dimension to do the operation over. | + +#### Inputs + +
+
inputs[0]: T
+
The input tensor.
+
+ +#### Outputs + +
+
outputs[0]: T
+
Output values.
+
outputs[1]: (int32, Linear)
+
Output indices.
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +### MMCVInstanceNormalization + +#### Description + +Carries out instance normalization as described in the paper https://arxiv.org/abs/1607.08022. + +y = scale * (x - mean) / sqrt(variance + epsilon) + B, where mean and variance are computed per instance per channel. + +#### Parameters + +| Type | Parameter | Description | +| ------- | --------- | -------------------------------------------------------------------- | +| `float` | `epsilon` | The epsilon value to use to avoid division by zero. Default is 1e-05 | + +#### Inputs + +
+
input: T
+
Input data tensor from the previous operator; dimensions for image case are (N x C x H x W), where N is the batch size, C is the number of channels, and H and W are the height and the width of the data. For non image case, the dimensions are in the form of (N x C x D1 x D2 ... Dn), where N is the batch size.
+
scale: T
+
The input 1-dimensional scale tensor of size C.
+
B: T
+
The input 1-dimensional bias tensor of size C.
+
+ +#### Outputs + +
+
output: T
+
The output tensor of the same shape as input.
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) + +### MMCVModulatedDeformConv2d + +#### Description + +Perform Modulated Deformable Convolution on input feature, read [Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline) for detail. + +#### Parameters + +| Type | Parameter | Description | +| -------------- | ------------------ | ------------------------------------------------------------------------------------- | +| `list of ints` | `stride` | The stride of the convolving kernel. (sH, sW) | +| `list of ints` | `padding` | Paddings on both sides of the input. (padH, padW) | +| `list of ints` | `dilation` | The spacing between kernel elements. (dH, dW) | +| `int` | `deformable_group` | Groups of deformable offset. | +| `int` | `group` | Split input into groups. `input_channel` should be divisible by the number of groups. | + +#### Inputs + +
+
inputs[0]: T
+
Input feature; 4-D tensor of shape (N, C, inH, inW), where N is the batch size, C is the number of channels, inH and inW are the height and width of the data.
+
inputs[1]: T
+
Input offset; 4-D tensor of shape (N, deformable_group* 2* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
+
inputs[2]: T
+
Input mask; 4-D tensor of shape (N, deformable_group* kH* kW, outH, outW), where kH and kW is the height and width of weight, outH and outW is the height and width of offset and output.
+
inputs[3]: T
+
Input weight; 4-D tensor of shape (output_channel, input_channel, kH, kW).
+
inputs[4]: T, optional
+
Input weight; 1-D tensor of shape (output_channel).
+
+ +#### Outputs + +
+
outputs[0]: T
+
Output feature; 4-D tensor of shape (N, output_channel, outH, outW).
+
+ +#### Type Constraints + +- T:tensor(float32, Linear) diff --git a/docs/deployment/tensorrt_plugin.md b/docs/deployment/tensorrt_plugin.md new file mode 100644 index 0000000..cd8924e --- /dev/null +++ b/docs/deployment/tensorrt_plugin.md @@ -0,0 +1,178 @@ +## TensorRT Plugins for custom operators in MMCV (Experimental) + + + +- [TensorRT Plugins for custom operators in MMCV (Experimental)](#tensorrt-plugins-for-custom-operators-in-mmcv-experimental) + - [Introduction](#introduction) + - [List of TensorRT plugins supported in MMCV](#list-of-tensorrt-plugins-supported-in-mmcv) + - [How to build TensorRT plugins in MMCV](#how-to-build-tensorrt-plugins-in-mmcv) + - [Prerequisite](#prerequisite) + - [Build on Linux](#build-on-linux) + - [Create TensorRT engine and run inference in python](#create-tensorrt-engine-and-run-inference-in-python) + - [How to add a TensorRT plugin for custom op in MMCV](#how-to-add-a-tensorrt-plugin-for-custom-op-in-mmcv) + - [Main procedures](#main-procedures) + - [Reminders](#reminders) + - [Known Issues](#known-issues) + - [References](#references) + + + +### Introduction + +**NVIDIA TensorRT** is a software development kit(SDK) for high-performance inference of deep learning models. It includes a deep learning inference optimizer and runtime that delivers low latency and high-throughput for deep learning inference applications. Please check its [developer's website](https://developer.nvidia.com/tensorrt) for more information. +To ease the deployment of trained models with custom operators from `mmcv.ops` using TensorRT, a series of TensorRT plugins are included in MMCV. + +### List of TensorRT plugins supported in MMCV + +| ONNX Operator | TensorRT Plugin | MMCV Releases | +| :-----------------------: | :-----------------------------------------------------------------------------: | :-----------: | +| MMCVRoiAlign | [MMCVRoiAlign](./tensorrt_custom_ops.md#mmcvroialign) | 1.2.6 | +| ScatterND | [ScatterND](./tensorrt_custom_ops.md#scatternd) | 1.2.6 | +| NonMaxSuppression | [NonMaxSuppression](./tensorrt_custom_ops.md#nonmaxsuppression) | 1.3.0 | +| MMCVDeformConv2d | [MMCVDeformConv2d](./tensorrt_custom_ops.md#mmcvdeformconv2d) | 1.3.0 | +| grid_sampler | [grid_sampler](./tensorrt_custom_ops.md#grid-sampler) | 1.3.1 | +| cummax | [cummax](./tensorrt_custom_ops.md#cummax) | 1.3.5 | +| cummin | [cummin](./tensorrt_custom_ops.md#cummin) | 1.3.5 | +| MMCVInstanceNormalization | [MMCVInstanceNormalization](./tensorrt_custom_ops.md#mmcvinstancenormalization) | 1.3.5 | +| MMCVModulatedDeformConv2d | [MMCVModulatedDeformConv2d](./tensorrt_custom_ops.md#mmcvmodulateddeformconv2d) | master | + +Notes + +- All plugins listed above are developed on TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0 + +### How to build TensorRT plugins in MMCV + +#### Prerequisite + +- Clone repository + +```bash +git clone https://github.com/open-mmlab/mmcv.git +``` + +- Install TensorRT + +Download the corresponding TensorRT build from [NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-download). + +For example, for Ubuntu 16.04 on x86-64 with cuda-10.2, the downloaded file is `TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz`. + +Then, install as below: + +```bash +cd ~/Downloads +tar -xvzf TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz +export TENSORRT_DIR=`pwd`/TensorRT-7.2.1.6 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TENSORRT_DIR/lib +``` + +Install python packages: tensorrt, graphsurgeon, onnx-graphsurgeon + +```bash +pip install $TENSORRT_DIR/python/tensorrt-7.2.1.6-cp37-none-linux_x86_64.whl +pip install $TENSORRT_DIR/onnx_graphsurgeon/onnx_graphsurgeon-0.2.6-py2.py3-none-any.whl +pip install $TENSORRT_DIR/graphsurgeon/graphsurgeon-0.4.5-py2.py3-none-any.whl +``` + +For more detailed information of installing TensorRT using tar, please refer to [Nvidia' website](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-721/install-guide/index.html#installing-tar). + +#### Build on Linux + +```bash +cd mmcv ## to MMCV root directory +MMCV_WITH_OPS=1 MMCV_WITH_TRT=1 pip install -e . +``` + +### Create TensorRT engine and run inference in python + +Here is an example. + +```python +import torch +import onnx + +from mmcv.tensorrt import (TRTWrapper, onnx2trt, save_trt_engine, + is_tensorrt_plugin_loaded) + +assert is_tensorrt_plugin_loaded(), 'Requires to complie TensorRT plugins in mmcv' + +onnx_file = 'sample.onnx' +trt_file = 'sample.trt' +onnx_model = onnx.load(onnx_file) + +## Model input +inputs = torch.rand(1, 3, 224, 224).cuda() +## Model input shape info +opt_shape_dict = { + 'input': [list(inputs.shape), + list(inputs.shape), + list(inputs.shape)] +} + +## Create TensorRT engine +max_workspace_size = 1 << 30 +trt_engine = onnx2trt( + onnx_model, + opt_shape_dict, + max_workspace_size=max_workspace_size) + +## Save TensorRT engine +save_trt_engine(trt_engine, trt_file) + +## Run inference with TensorRT +trt_model = TRTWrapper(trt_file, ['input'], ['output']) + +with torch.no_grad(): + trt_outputs = trt_model({'input': inputs}) + output = trt_outputs['output'] + +``` + +### How to add a TensorRT plugin for custom op in MMCV + +#### Main procedures + +Below are the main steps: + +1. Add c++ header file +2. Add c++ source file +3. Add cuda kernel file +4. Register plugin in `trt_plugin.cpp` +5. Add unit test in `tests/test_ops/test_tensorrt.py` + +**Take RoIAlign plugin `roi_align` for example.** + +1. Add header `trt_roi_align.hpp` to TensorRT include directory `mmcv/ops/csrc/tensorrt/` +2. Add source `trt_roi_align.cpp` to TensorRT source directory `mmcv/ops/csrc/tensorrt/plugins/` +3. Add cuda kernel `trt_roi_align_kernel.cu` to TensorRT source directory `mmcv/ops/csrc/tensorrt/plugins/` +4. Register `roi_align` plugin in [trt_plugin.cpp](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp) + + ```c++ + #include "trt_plugin.hpp" + + #include "trt_roi_align.hpp" + + REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator); + + extern "C" { + bool initLibMMCVInferPlugins() { return true; } + } // extern "C" + ``` + +5. Add unit test into `tests/test_ops/test_tensorrt.py` + Check [here](https://github.com/open-mmlab/mmcv/blob/master/tests/test_ops/test_tensorrt.py) for examples. + +#### Reminders + +- Some of the [custom ops](https://mmcv.readthedocs.io/en/latest/ops.html) in `mmcv` have their cuda implementations, which could be referred. + +### Known Issues + +- None + +### References + +- [Developer guide of Nvidia TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html) +- [TensorRT Open Source Software](https://github.com/NVIDIA/TensorRT) +- [onnx-tensorrt](https://github.com/onnx/onnx-tensorrt) +- [TensorRT python API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html) +- [TensorRT c++ plugin API](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_plugin.html) diff --git a/docs/en/_static/version.json b/docs/en/_static/version.json deleted file mode 100644 index 7ee4965..0000000 --- a/docs/en/_static/version.json +++ /dev/null @@ -1,575 +0,0 @@ -{ - "Linux": [ - { - "cuda": "11.7", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "11.6", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "11.6", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.5", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.0", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "9.2", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "9.2", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "cpu", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - } - ], - "Windows": [ - { - "cuda": "11.7", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "11.6", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "11.6", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.5", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "10.2", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "10.1", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "cpu", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - } - ], - "macOS": [ - { - "cuda": "cpu", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "mps", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "cpu", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - } - ] -} diff --git a/docs/en/_templates/classtemplate.rst b/docs/en/_templates/classtemplate.rst deleted file mode 100644 index 4f74842..0000000 --- a/docs/en/_templates/classtemplate.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. role:: hidden - :class: hidden-section -.. currentmodule:: {{ module }} - - -{{ name | underline}} - -.. autoclass:: {{ name }} - :members: - - -.. - autogenerated from source/_templates/classtemplate.rst - note it does not have :inherited-members: diff --git a/docs/en/api/arraymisc.rst b/docs/en/api/arraymisc.rst deleted file mode 100644 index 28975eb..0000000 --- a/docs/en/api/arraymisc.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.arraymisc -=================================== - -.. contents:: mmcv.arraymisc - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.arraymisc - -.. autosummary:: - :toctree: generated - :nosignatures: - - quantize - dequantize diff --git a/docs/en/api/cnn.rst b/docs/en/api/cnn.rst deleted file mode 100644 index 022191f..0000000 --- a/docs/en/api/cnn.rst +++ /dev/null @@ -1,71 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.cnn -=================================== - -.. contents:: mmcv.cnn - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.cnn - -Module ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - ContextBlock - Conv2d - Conv3d - ConvAWS2d - ConvModule - ConvTranspose2d - ConvTranspose3d - ConvWS2d - DepthwiseSeparableConvModule - GeneralizedAttention - HSigmoid - HSwish - LayerScale - Linear - MaxPool2d - MaxPool3d - NonLocal1d - NonLocal2d - NonLocal3d - Scale - Swish - Conv2dRFSearchOp - -Build Function ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - build_activation_layer - build_conv_layer - build_norm_layer - build_padding_layer - build_plugin_layer - build_upsample_layer - -Miscellaneous ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - fuse_conv_bn - conv_ws_2d - is_norm - make_res_layer - make_vgg_layer - get_model_complexity_info diff --git a/docs/en/api/image.rst b/docs/en/api/image.rst deleted file mode 100644 index 3b93484..0000000 --- a/docs/en/api/image.rst +++ /dev/null @@ -1,100 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.image -=================================== - -.. contents:: mmcv.image - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.image - -IO ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - imfrombytes - imread - imwrite - use_backend - -Color Space ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - bgr2gray - bgr2hls - bgr2hsv - bgr2rgb - bgr2ycbcr - gray2bgr - gray2rgb - hls2bgr - hsv2bgr - imconvert - rgb2bgr - rgb2gray - rgb2ycbcr - ycbcr2bgr - ycbcr2rgb - -Geometric ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - cutout - imcrop - imflip - impad - impad_to_multiple - imrescale - imresize - imresize_like - imresize_to_multiple - imrotate - imshear - imtranslate - rescale_size - -Photometric ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - adjust_brightness - adjust_color - adjust_contrast - adjust_hue - adjust_lighting - adjust_sharpness - auto_contrast - clahe - imdenormalize - imequalize - iminvert - imnormalize - lut_transform - posterize - solarize - -Miscellaneous ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - tensor2imgs diff --git a/docs/en/api/ops.rst b/docs/en/api/ops.rst deleted file mode 100644 index b029045..0000000 --- a/docs/en/api/ops.rst +++ /dev/null @@ -1,135 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.ops -=================================== - -.. contents:: mmcv.ops - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.ops - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - BorderAlign - CARAFE - CARAFENaive - CARAFEPack - Conv2d - ConvTranspose2d - CornerPool - Correlation - CrissCrossAttention - DeformConv2d - DeformConv2dPack - DeformRoIPool - DeformRoIPoolPack - DynamicScatter - FusedBiasLeakyReLU - GroupAll - Linear - MaskedConv2d - MaxPool2d - ModulatedDeformConv2d - ModulatedDeformConv2dPack - ModulatedDeformRoIPoolPack - MultiScaleDeformableAttention - PSAMask - PointsSampler - PrRoIPool - QueryAndGroup - RiRoIAlignRotated - RoIAlign - RoIAlignRotated - RoIAwarePool3d - RoIPointPool3d - RoIPool - SAConv2d - SigmoidFocalLoss - SimpleRoIAlign - SoftmaxFocalLoss - SparseConv2d - SparseConv3d - SparseConvTensor - SparseConvTranspose2d - SparseConvTranspose3d - SparseInverseConv2d - SparseInverseConv3d - SparseMaxPool2d - SparseMaxPool3d - SparseModule - SparseSequential - SubMConv2d - SubMConv3d - SyncBatchNorm - TINShift - Voxelization - -.. autosummary:: - :toctree: generated - :nosignatures: - - active_rotated_filter - assign_score_withk - ball_query - batched_nms - bbox_overlaps - border_align - box_iou_rotated - boxes_iou3d - boxes_iou_bev - boxes_overlap_bev - carafe - carafe_naive - chamfer_distance - contour_expand - convex_giou - convex_iou - deform_conv2d - deform_roi_pool - diff_iou_rotated_2d - diff_iou_rotated_3d - dynamic_scatter - furthest_point_sample - furthest_point_sample_with_dist - fused_bias_leakyrelu - gather_points - grouping_operation - knn - masked_conv2d - min_area_polygons - modulated_deform_conv2d - nms - nms3d - nms3d_normal - nms_bev - nms_match - nms_normal_bev - nms_rotated - pixel_group - point_sample - points_in_boxes_all - points_in_boxes_cpu - points_in_boxes_part - points_in_polygons - prroi_pool - rel_roi_point_to_rel_img_point - riroi_align_rotated - roi_align - roi_align_rotated - roi_pool - rotated_feature_align - scatter_nd - sigmoid_focal_loss - soft_nms - softmax_focal_loss - three_interpolate - three_nn - tin_shift - upfirdn2d - voxelization diff --git a/docs/en/api/transforms.rst b/docs/en/api/transforms.rst deleted file mode 100644 index b080133..0000000 --- a/docs/en/api/transforms.rst +++ /dev/null @@ -1,60 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.transforms -=================================== - -.. currentmodule:: mmcv.transforms - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - BaseTransform - TestTimeAug - -Loading ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - LoadAnnotations - LoadImageFromFile - -Processing ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - CenterCrop - MultiScaleFlipAug - Normalize - Pad - RandomChoiceResize - RandomFlip - RandomGrayscale - RandomResize - Resize - ToTensor - ImageToTensor - -Wrapper ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - Compose - KeyMapper - RandomApply - RandomChoice - TransformBroadcaster diff --git a/docs/en/api/utils.rst b/docs/en/api/utils.rst deleted file mode 100644 index f2ff4c2..0000000 --- a/docs/en/api/utils.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.utils -=================================== - -.. contents:: mmcv.utils - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.utils - -.. autosummary:: - :toctree: generated - :nosignatures: - - IS_CUDA_AVAILABLE - IS_MLU_AVAILABLE - IS_MPS_AVAILABLE - collect_env - jit - skip_no_elena diff --git a/docs/en/api/video.rst b/docs/en/api/video.rst deleted file mode 100644 index a6ebca0..0000000 --- a/docs/en/api/video.rst +++ /dev/null @@ -1,56 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.video -=================================== - -.. contents:: mmcv.video - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.video - -IO ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - VideoReader - Cache - -.. autosummary:: - :toctree: generated - :nosignatures: - - frames2video - -Optical Flow ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - dequantize_flow - flow_from_bytes - flow_warp - flowread - flowwrite - quantize_flow - sparse_flow_from_bytes - -Video Processing ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - concat_video - convert_video - cut_video - resize_video diff --git a/docs/en/api/visualization.rst b/docs/en/api/visualization.rst deleted file mode 100644 index 8f43ef2..0000000 --- a/docs/en/api/visualization.rst +++ /dev/null @@ -1,50 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.visualization -=================================== - -.. contents:: mmcv.visualization - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.visualization - -Color ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - Color - -.. autosummary:: - :toctree: generated - :nosignatures: - - color_val - -Image ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - imshow - imshow_bboxes - imshow_det_bboxes - -Optical Flow ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - flow2rgb - flowshow - make_color_wheel diff --git a/docs/en/community/contributing.md b/docs/en/community/contributing.md deleted file mode 100644 index e339935..0000000 --- a/docs/en/community/contributing.md +++ /dev/null @@ -1,267 +0,0 @@ -## Contributing to OpenMMLab - -Welcome to the MMCV community, we are committed to building a cutting-edge computer vision foundational library and all kinds of contributions are welcomed, including but not limited to - -**Fix bug** - -You can directly post a Pull Request to fix typo in code or documents - -The steps to fix the bug of code implementation are as follows. - -1. If the modification involve significant changes, you should create an issue first and describe the error information and how to trigger the bug. Other developers will discuss with you and propose an proper solution. - -2. Posting a pull request after fixing the bug and adding corresponding unit test. - -**New Feature or Enhancement** - -1. If the modification involve significant changes, you should create an issue to discuss with our developers to propose an proper design. -2. Post a Pull Request after implementing the new feature or enhancement and add corresponding unit test. - -**Document** - -You can directly post a pull request to fix documents. If you want to add a document, you should first create an issue to check if it is reasonable. - -### Pull Request Workflow - -If you're not familiar with Pull Request, don't worry! The following guidance will tell you how to create a Pull Request step by step. If you want to dive into the develop mode of Pull Request, you can refer to the [official documents](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) - -#### 1. Fork and clone - -If you are posting a pull request for the first time, you should fork the OpenMMLab repositories by clicking the **Fork** button in the top right corner of the GitHub page, and the forked repositories will appear under your GitHub profile. - - - -Then, you can clone the repositories to local: - -```shell -git clone git@github.com:{username}/mmcv.git -``` - -After that, you should ddd official repository as the upstream repository - -```bash -git remote add upstream git@github.com:open-mmlab/mmcv -``` - -Check whether remote repository has been added successfully by `git remote -v` - -```bash -origin git@github.com:{username}/mmcv.git (fetch) -origin git@github.com:{username}/mmcv.git (push) -upstream git@github.com:open-mmlab/mmcv (fetch) -upstream git@github.com:open-mmlab/mmcv (push) -``` - -```{note} -Here's a brief introduction to origin and upstream. When we use "git clone", we create an "origin" remote by default, which points to the repository cloned from. As for "upstream", we add it ourselves to point to the target repository. Of course, if you don't like the name "upstream", you could name it as you wish. Usually, we'll push the code to "origin". If the pushed code conflicts with the latest code in official("upstream"), we should pull the latest code from upstream to resolve the conflicts, and then push to "origin" again. The posted Pull Request will be updated automatically. -``` - -#### 2. Configure pre-commit - -You should configure [pre-commit](https://pre-commit.com/#intro) in the local development environment to make sure the code style matches that of OpenMMLab. **Note**: The following code should be executed under the MMCV directory. - -```shell -pip install -U pre-commit -pre-commit install -``` - -Check that pre-commit is configured successfully, and install the hooks defined in `.pre-commit-config.yaml`. - -```shell -pre-commit run --all-files -``` - - - - - -```{note} -Chinese users may fail to download the pre-commit hooks due to the network issue. In this case, you could download these hooks from gitee by setting the .pre-commit-config-zh-cn.yaml - -pre-commit install -c .pre-commit-config-zh-cn.yaml -pre-commit run --all-files -c .pre-commit-config-zh-cn.yaml -``` - -If the installation process is interrupted, you can repeatedly run `pre-commit run ... ` to continue the installation. - -If the code does not conform to the code style specification, pre-commit will raise a warning and fixes some of the errors automatically. - - - -If we want to commit our code bypassing the pre-commit hook, we can use the `--no-verify` option(**only for temporarily commit**. - -```shell -git commit -m "xxx" --no-verify -``` - -#### 3. Create a development branch - -After configuring the pre-commit, we should create a branch based on the master branch to develop the new feature or fix the bug. The proposed branch name is `username/pr_name` - -```shell -git checkout -b yhc/refactor_contributing_doc -``` - -In subsequent development, if the master branch of the local repository is behind the master branch of "upstream", we need to pull the upstream for synchronization, and then execute the above command: - -```shell -git pull upstream master -``` - -#### 4. Commit the code and pass the unit test - -- MMCV introduces mypy to do static type checking to increase the robustness of the code. Therefore, we need to add Type Hints to our code and pass the mypy check. If you are not familiar with Type Hints, you can refer to [this tutorial](https://docs.python.org/3/library/typing.html). - -- The committed code should pass through the unit test - - ```shell - # Pass all unit tests - pytest tests - - # Pass the unit test of runner - pytest tests/test_runner/test_runner.py - ``` - - If the unit test fails for lack of dependencies, you can install the dependencies referring to the [guidance](#unit-test) - -- If the documents are modified/added, we should check the rendering result referring to [guidance](#document-rendering) - -#### 5. Push the code to remote - -We could push the local commits to remote after passing through the check of unit test and pre-commit. You can associate the local branch with remote branch by adding `-u` option. - -```shell -git push -u origin {branch_name} -``` - -This will allow you to use the `git push` command to push code directly next time, without having to specify a branch or the remote repository. - -#### 6. Create a Pull Request - -(1) Create a pull request in GitHub's Pull request interface - - - -(2) Modify the PR description according to the guidelines so that other developers can better understand your changes - - - -Find more details about Pull Request description in [pull request guidelines](#pr-specs). - -**note** - -(a) The Pull Request description should contain the reason for the change, the content of the change, and the impact of the change, and be associated with the relevant Issue (see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) - -(b) If it is your first contribution, please sign the CLA - - - -(c) Check whether the Pull Request pass through the CI - - - -MMCV will run unit test for the posted Pull Request on different platforms (Linux, Window, Mac), based on different versions of Python, PyTorch, CUDA to make sure the code is correct. We can see the specific test information by clicking `Details` in the above image so that we can modify the code. - -(3) If the Pull Request passes the CI, then you can wait for the review from other developers. You'll modify the code based on the reviewer's comments, and repeat the steps [4](#4-commit-the-code-and-pass-the-unit-test)-[5](#5-push-the-code-to-remote) until all reviewers approve it. Then, we will merge it ASAP. - - - -#### 7. Resolve conflicts - -If your local branch conflicts with the latest master branch of "upstream", you'll need to resolove them. There are two ways to do this: - -```shell -git fetch --all --prune -git rebase upstream/master -``` - -or - -```shell -git fetch --all --prune -git merge upstream/master -``` - -If you are very good at handling conflicts, then you can use rebase to resolve conflicts, as this will keep your commit logs tidy. If you are not familiar with `rebase`, then you can use `merge` to resolve conflicts. - -### Guidance - -#### Unit test - -If you cannot run the unit test of some modules for lacking of some dependencies, such as [video](https://github.com/open-mmlab/mmcv/tree/master/mmcv/video) module, you can try to install the following dependencies: - -```shell -# Linux -sudo apt-get update -y -sudo apt-get install -y libturbojpeg -sudo apt-get install -y ffmpeg - -# Windows -conda install ffmpeg -``` - -We should also make sure the committed code will not decrease the coverage of unit test, we could run the following command to check the coverage of unit test: - -```shell -python -m coverage run -m pytest /path/to/test_file -python -m coverage html -# check file in htmlcov/index.html -``` - -#### Document rendering - -If the documents are modified/added, we should check the rendering result. We could install the dependencies and run the following command to render the documents and check the results: - -```shell -pip install -r requirements/docs.txt -cd docs/zh_cn/ -# or docs/en -make html -# check file in ./docs/zh_cn/_build/html/index.html -``` - -### Code style - -#### Python - -We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. - -We use the following tools for linting and formatting: - -- [flake8](https://github.com/PyCQA/flake8): A wrapper around some linter tools. -- [isort](https://github.com/timothycrosley/isort): A Python utility to sort imports. -- [yapf](https://github.com/google/yapf): A formatter for Python files. -- [codespell](https://github.com/codespell-project/codespell): A Python utility to fix common misspellings in text files. -- [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files. -- [docformatter](https://github.com/myint/docformatter): A formatter to format docstring. - -Style configurations of yapf and isort can be found in [setup.cfg](./setup.cfg). - -We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`, -fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit. -The config for a pre-commit hook is stored in [.pre-commit-config](./.pre-commit-config.yaml). - -#### C++ and CUDA - -We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). - -### PR Specs - -1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style - -2. One short-time branch should be matched with only one PR - -3. Accomplish a detailed change in one PR. Avoid large PR - - - Bad: Support Faster R-CNN - - Acceptable: Add a box head to Faster R-CNN - - Good: Add a parameter to box head to support custom conv-layer number - -4. Provide clear and significant commit message - -5. Provide clear and meaningful PR description - - - Task name should be clarified in title. The general format is: \[Prefix\] Short description of the PR (Suffix) - - Prefix: add new feature \[Feature\], fix bug \[Fix\], related to documents \[Docs\], in developing \[WIP\] (which will not be reviewed temporarily) - - Introduce main changes, results and influences on other modules in short description - - Associate related issues and pull requests with a milestone diff --git a/docs/en/community/pr.md b/docs/en/community/pr.md deleted file mode 100644 index 1bdd90f..0000000 --- a/docs/en/community/pr.md +++ /dev/null @@ -1,3 +0,0 @@ -## Pull Request (PR) - -Content has been migrated to [contributing guidance](contributing.md). diff --git a/docs/en/docutils.conf b/docs/en/docutils.conf deleted file mode 100644 index 0c00c84..0000000 --- a/docs/en/docutils.conf +++ /dev/null @@ -1,2 +0,0 @@ -[html writers] -table_style: colwidths-auto diff --git a/docs/en/faq.md b/docs/en/faq.md deleted file mode 100644 index 02d31c2..0000000 --- a/docs/en/faq.md +++ /dev/null @@ -1,93 +0,0 @@ -## Frequently Asked Questions - -We list some common troubles faced by many users and their corresponding solutions here. -Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. - -### Installation - -- KeyError: "xxx: 'yyy is not in the zzz registry'" - - The registry mechanism will be triggered only when the file of the module is imported. - So you need to import that file somewhere. More details can be found at [KeyError: "MaskRCNN: 'RefineRoIHead is not in the models registry'"](https://github.com/open-mmlab/mmdetection/issues/5974). - -- "No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'" - - 1. Uninstall existing mmcv in the environment using `pip uninstall mmcv` - 2. Install mmcv-full following the [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) or [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html) - -- "invalid device function" or "no kernel image is available for execution" - - 1. Check the CUDA compute capability of you GPU - 2. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built for the correct GPU architecture. You may need to set `TORCH_CUDA_ARCH_LIST` to reinstall MMCV. The compatibility issue could happen when using old GPUS, e.g., Tesla K80 (3.7) on colab. - 3. Check whether the running environment is the same as that when mmcv/mmdet is compiled. For example, you may compile mmcv using CUDA 10.0 bug run it on CUDA9.0 environments - -- "undefined symbol" or "cannot open xxx.so" - - 1. If those symbols are CUDA/C++ symbols (e.g., libcudart.so or GLIBCXX), check - whether the CUDA/GCC runtimes are the same as those used for compiling mmcv - 2. If those symbols are Pytorch symbols (e.g., symbols containing caffe, aten, and TH), check whether the Pytorch version is the same as that used for compiling mmcv - 3. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, and MMCV are built by and running on the same environment - -- "RuntimeError: CUDA error: invalid configuration argument" - - This error may be caused by the poor performance of GPU. Try to decrease the value of [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10) - and recompile mmcv. - -- "RuntimeError: nms is not compiled with GPU support" - - This error is because your CUDA environment is not installed correctly. - You may try to re-install your CUDA environment and then delete the build/ folder before re-compile mmcv. - -- "Segmentation fault" - - 1. Check your GCC version and use GCC >= 5.4. This usually caused by the incompatibility between PyTorch and the environment (e.g., GCC \< 4.9 for PyTorch). We also recommend the users to avoid using GCC 5.5 because many feedbacks report that GCC 5.5 will cause "segmentation fault" and simply changing it to GCC 5.4 could solve the problem - 2. Check whether PyTorch is correctly installed and could use CUDA op, e.g. type the following command in your terminal and see whether they could correctly output results - ```shell - python -c 'import torch; print(torch.cuda.is_available())' - ``` - 3. If PyTorch is correctly installed, check whether MMCV is correctly installed. If MMCV is correctly installed, then there will be no issue of the command - ```shell - python -c 'import mmcv; import mmcv.ops' - ``` - 4. If MMCV and PyTorch are correctly installed, you can use `ipdb` to set breakpoints or directly add `print` to debug and see which part leads the `segmentation fault` - -- "libtorch_cuda_cu.so: cannot open shared object file" - - `mmcv-full` depends on the share object but it can not be found. We can check whether the object exists in `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` or try to re-install the PyTorch. - -- "fatal error C1189: #error: -- unsupported Microsoft Visual Studio version!" - - If you are building mmcv-full on Windows and the version of CUDA is 9.2, you will probably encounter the error `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error: -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`, in which case you can use a lower version of Microsoft Visual Studio like vs2017. - -- "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized" - - If your version of PyTorch is 1.5.0 and you are building mmcv-full on Windows, you will probably encounter the error `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`. The way to solve the error is to replace all the `static constexpr bool all_slots = false;` with `static bool all_slots = false;` at this file `https://github.com/pytorch/pytorch/blob/v1.5.0/torch/csrc/jit/api/module.h`. More details can be found at [member "torch::jit::detail::AttributePolicy::all_slots" may not be initialized](https://github.com/pytorch/pytorch/issues/39394). - -- "error: a member with an in-class initializer must be const" - - If your version of PyTorch is 1.6.0 and you are building mmcv-full on Windows, you will probably encounter the error `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. The way to solve the error is to replace all the `CONSTEXPR_EXCEPT_WIN_CUDA ` with `const` at `torch/include\torch/csrc/jit/api/module.h`. More details can be found at [Ninja: build stopped: subcommand failed](https://github.com/open-mmlab/mmcv/issues/575). - -- "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized" - - If your version of PyTorch is 1.7.0 and you are building mmcv-full on Windows, you will probably encounter the error `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. The way to solve the error needs to modify several local files of PyTorch: - - - delete `static constexpr Symbol Kind = ::c10::prim::profile;` and `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` at `torch/include\torch/csrc/jit/ir/ir.h` - - replace `explicit operator type&() { return *(this->value); }` with `explicit operator type&() { return *((type*)this->value); }` at `torch\include\pybind11\cast.h` - - replace all the `CONSTEXPR_EXCEPT_WIN_CUDA` with `const` at `torch/include\torch/csrc/jit/api/module.h` - - More details can be found at [Ensure default extra_compile_args](https://github.com/pytorch/pytorch/pull/45956). - -- Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer" - - Please install the correct version of MMCV for the version of your MMDetection following the [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation). - -### Usage - -- "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one" - - 1. This error indicates that your module has parameters that were not used in producing loss. This phenomenon may be caused by running different branches in your code in DDP mode. More datails at [Expected to have finished reduction in the prior iteration before starting a new one](https://github.com/pytorch/pytorch/issues/55582). - 2. You can set ` find_unused_parameters = True` in the config to solve the above problems or find those unused parameters manually - -- "RuntimeError: Trying to backward through the graph a second time" - - `GradientCumulativeOptimizerHook` and `OptimizerHook` are both set which causes the `loss.backward()` to be called twice so `RuntimeError` was raised. We can only use one of these. More datails at [Trying to backward through the graph a second time](https://github.com/open-mmlab/mmcv/issues/1379). diff --git a/docs/en/get_started/build.md b/docs/en/get_started/build.md deleted file mode 100644 index e3d48ec..0000000 --- a/docs/en/get_started/build.md +++ /dev/null @@ -1,292 +0,0 @@ -## Build MMCV from source - -### Build mmcv - -Before installing mmcv, make sure that PyTorch has been successfully installed following the [PyTorch official installation guide](https://pytorch.org/get-started/locally/#start-locally). This can be verified using the following command - -```bash -python -c 'import torch;print(torch.__version__)' -``` - -If version information is output, then PyTorch is installed. - -```{note} -If you would like to use `opencv-python-headless` instead of `opencv-python`, -e.g., in a minimum container environment or servers without GUI, -you can first install it before installing MMCV to skip the installation of `opencv-python`. -``` - -#### Build on Linux - -1. Clone the repo - - ```bash - git clone https://github.com/open-mmlab/mmcv.git - cd mmcv - ``` - -2. Install `ninja` and `psutil` to speed up the compilation - - ```bash - pip install -r requirements/optional.txt - ``` - -3. Check the nvcc version (requires 9.2+. Skip if no GPU available.) - - ```bash - nvcc --version - ``` - - If the above command outputs the following message, it means that the nvcc setting is OK, otherwise you need to set CUDA_HOME. - - ``` - nvcc: NVIDIA (R) Cuda compiler driver - Copyright (c) 2005-2020 NVIDIA Corporation - Built on Mon_Nov_30_19:08:53_PST_2020 - Cuda compilation tools, release 11.2, V11.2.67 - Build cuda_11.2.r11.2/compiler.29373293_0 - ``` - - :::{note} - If you want to support ROCm, you can refer to [AMD ROCm](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html) to install ROCm. - ::: - -4. Check the gcc version (requires 5.4+) - - ```bash - gcc --version - ``` - -5. Start building (takes 10+ min) - - ```bash - pip install -e . -v - ``` - -6. Validate the installation - - ```bash - python .dev_scripts/check_installation.py - ``` - - If no error is reported by the above command, the installation is successful. If there is an error reported, please check [Frequently Asked Questions](../faq.md) to see if there is already a solution. - - If no solution is found, please feel free to open an [issue](https://github.com/open-mmlab/mmcv/issues). - -#### Build on macOS - -```{note} -If you are using a mac with apple silicon chip, install the PyTorch 1.13+, otherwise you will encounter the problem in [issues#2218](https://github.com/open-mmlab/mmcv/issues/2218). -``` - -1. Clone the repo - - ```bash - git clone https://github.com/open-mmlab/mmcv.git - cd mmcv - ``` - -2. Install `ninja` and `psutil` to speed up the compilation - - ```bash - pip install -r requirements/optional.txt - ``` - -3. Start building - - ```bash - MMCV_WITH_OPS=1 pip install -e . - ``` - -4. Validate the installation - - ```bash - python .dev_scripts/check_installation.py - ``` - - If no error is reported by the above command, the installation is successful. If there is an error reported, please check [Frequently Asked Questions](../faq.md) to see if there is already a solution. - - If no solution is found, please feel free to open an [issue](https://github.com/open-mmlab/mmcv/issues). - -#### Build on Windows - -Building MMCV on Windows is a bit more complicated than that on Linux. -The following instructions show how to get this accomplished. - -##### Prerequisite - -The following software is required for building MMCV on windows. -Install them first. - -- [Git](https://git-scm.com/download/win) - - During installation, tick **add git to Path**. -- [Visual Studio Community 2019](https://visualstudio.microsoft.com) - - A compiler for C++ and CUDA codes. -- [Miniconda](https://docs.conda.io/en/latest/miniconda.html) - - Official distributions of Python should work too. -- [CUDA 10.2](https://developer.nvidia.com/cuda-10.2-download-archive) - - Not required for building CPU version. - - Customize the installation if necessary. As a recommendation, skip the driver installation if a newer version is already installed. - -```{note} -You should know how to set up environment variables, especially `Path`, on Windows. The following instruction relies heavily on this skill. -``` - -##### Common steps - -1. Launch Anaconda prompt from Windows Start menu - - Do not use raw `cmd.exe` s instruction is based on PowerShell syntax. - -2. Create a new conda environment - - ```powershell - (base) PS C:\Users\xxx> conda create --name mmcv python=3.7 - (base) PS C:\Users\xxx> conda activate mmcv # make sure to activate environment before any operation - ``` - -3. Install PyTorch. Choose a version based on your need. - - ```powershell - # CUDA version - (mmcv) PS C:\Users\xxx> conda install pytorch torchvision cudatoolkit=10.2 -c pytorch - # CPU version - (mmcv) PS C:\Users\xxx> conda install install pytorch torchvision cpuonly -c pytorch - ``` - -4. Clone the repo - - ```powershell - (mmcv) PS C:\Users\xxx> git clone https://github.com/open-mmlab/mmcv.git - (mmcv) PS C:\Users\xxx\mmcv> cd mmcv - ``` - -5. Install `ninja` and `psutil` to speed up the compilation - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> pip install -r requirements/optional.txt - ``` - -6. Set up MSVC compiler - - Set Environment variable, add `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\bin\Hostx86\x64` to `PATH`, so that `cl.exe` will be available in prompt, as shown below. - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> cl - Microsoft (R) C/C++ Optimizing Compiler Version 19.27.29111 for x64 - Copyright (C) Microsoft Corporation. All rights reserved. - - usage: cl [ option... ] filename... [ / link linkoption... ] - ``` - - For compatibility, we use the x86-hosted and x64-targeted compiler. note `Hostx86\x64` in the path. - - You may want to change the system language to English because pytorch will parse text output from `cl.exe` to check its version. However only utf-8 is recognized. Navigate to Control Panel -> Region -> Administrative -> Language for Non-Unicode programs and change it to English. - -##### Build and install MMCV - -mmcv can be built in two ways: - -1. Full version (CPU ops) - - Module `ops` will be compiled as a pytorch extension, but only x86 code will be compiled. The compiled ops can be executed on CPU only. - -2. Full version (CUDA ops) - - Both x86 and CUDA codes of `ops` module will be compiled. The compiled version can be run on both CPU and CUDA-enabled GPU (if implemented). - -###### CPU version - -Build and install - -```powershell -(mmcv) PS C:\Users\xxx\mmcv> python setup.py build_ext -(mmcv) PS C:\Users\xxx\mmcv> python setup.py develop -``` - -###### GPU version - -1. Make sure `CUDA_PATH` or `CUDA_HOME` is already set in `envs` via `ls env:`, desired output is shown as below: - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> ls env: - - Name Value - ---- ----- - CUDA_PATH C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 - CUDA_PATH_V10_1 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1 - CUDA_PATH_V10_2 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 - ``` - - This should already be done by CUDA installer. If not, or you have multiple version of CUDA toolkit installed, set it with - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> $env:CUDA_HOME = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" - # OR - (mmcv) PS C:\Users\xxx\mmcv> $env:CUDA_HOME = $env:CUDA_PATH_V10_2 # if CUDA_PATH_V10_2 is in envs: - ``` - -2. Set CUDA target arch - - ```shell - # Here you need to change to the target architecture corresponding to your GPU - (mmcv) PS C:\Users\xxx\mmcv> $env:TORCH_CUDA_ARCH_LIST="7.5" - ``` - - :::{note} - Check your the compute capability of your GPU from [here](https://developer.nvidia.com/cuda-gpus). - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> &"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\extras\demo_suite\deviceQuery.exe" - Device 0: "NVIDIA GeForce GTX 1660 SUPER" - CUDA Driver Version / Runtime Version 11.7 / 11.1 - CUDA Capability Major/Minor version number: 7.5 - ``` - - The 7.5 above indicates the target architecture. Note: You need to replace v10.2 with your CUDA version in the above command. - ::: - -3. Build and install - - ```powershell - # build - python setup.py build_ext # if success, cl will be launched to compile ops - # install - python setup.py develop - ``` - - ```{note} - If you are compiling against PyTorch 1.6.0, you might meet some errors from PyTorch as described in [this issue](https://github.com/pytorch/pytorch/issues/42467). Follow [this pull request](https://github.com/pytorch/pytorch/pull/43380/files) to modify the source code in your local PyTorch installation. - ``` - -##### Validate installation - -```powershell -(mmcv) PS C:\Users\xxx\mmcv> python .dev_scripts/check_installation.py -``` - -If no error is reported by the above command, the installation is successful. If there is an error reported, please check [Frequently Asked Questions](../faq.md) to see if there is already a solution. -If no solution is found, please feel free to open an [issue](https://github.com/open-mmlab/mmcv/issues). - -### Build mmcv-lite - -If you need to use PyTorch-related modules, make sure PyTorch has been successfully installed in your environment by referring to the [PyTorch official installation guide](https://github.com/pytorch/pytorch#installation). - -1. Clone the repo - - ```bash - git clone https://github.com/open-mmlab/mmcv.git - cd mmcv - ``` - -2. Start building - - ```bash - MMCV_WITH_OPS=0 pip install -e . -v - ``` - -3. Validate installation - - ```bash - python -c 'import mmcv;print(mmcv.__version__)' - ``` diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md deleted file mode 100644 index 12bad00..0000000 --- a/docs/en/get_started/installation.md +++ /dev/null @@ -1,348 +0,0 @@ -## Installation - -There are two versions of MMCV: - -- **mmcv**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build. -- **mmcv-lite**: lite, without CUDA ops but all other features, similar to mmcv\<1.0.0. It is useful when you do not need those CUDA ops. - -```{warning} -Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is avaliable`. -``` - -### Install mmcv - -Before installing mmcv, make sure that PyTorch has been successfully installed following the [PyTorch official installation guide](https://pytorch.org/get-started/locally/#start-locally). This can be verified using the following command - -```bash -python -c 'import torch;print(torch.__version__)' -``` - -If version information is output, then PyTorch is installed. - -#### Install with mim (recommended) - -[mim](https://github.com/open-mmlab/mim) is the package management tool for the OpenMMLab projects, which makes it easy to install mmcv - -```bash -pip install -U openmim -mim install "mmcv>=2.0.0rc1" -``` - -If you find that the above installation command does not use a pre-built package ending with `.whl` but a source package ending with `.tar.gz`, you may not have a pre-build package corresponding to the PyTorch or CUDA or mmcv version, in which case you can [build mmcv from source](build.md). - -
-Installation log using pre-built packages - -Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
-Collecting mmcv
-Downloading https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/mmcv-2.0.0rc3-cp38-cp38-manylinux1_x86_64.whl - -
- -
-Installation log using source packages - -Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
-Collecting mmcv==2.0.0rc3
-Downloading mmcv-2.0.0rc3.tar.gz - -
- -To install a specific version of mmcv, for example, mmcv version 2.0.0rc3, you can use the following command - -```bash -mim install mmcv==2.0.0rc3 -``` - -:::{note} -If you would like to use `opencv-python-headless` instead of `opencv-python`, -e.g., in a minimum container environment or servers without GUI, -you can first install it before installing MMCV to skip the installation of `opencv-python`. - -Alternatively, if it takes too long to install a dependency library, you can specify the pypi source - -```bash -mim install "mmcv>=2.0.0rc3" -i https://pypi.tuna.tsinghua.edu.cn/simple -``` - -::: - -You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/2.x/.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands. - -#### Install with pip - -Use the following command to check the version of CUDA and PyTorch - -```bash -python -c 'import torch;print(torch.__version__);print(torch.version.cuda)' -``` - -Select the appropriate installation command depending on the type of system, CUDA version, PyTorch version, and MMCV version - - - - -
- - - - -
-

-
-
-
-
-If you do not find a corresponding version in the dropdown box above, you probably do not have a pre-built package corresponding to the PyTorch or CUDA or mmcv version, at which point you can [build mmcv from source](build.md).
-
-:::{note}
-mmcv is only compiled on PyTorch 1.x.0 because the compatibility
-usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you
-can install mmcv compiled with PyTorch 1.x.0 and it usually works well.
-For example, if your PyTorch version is 1.8.1, you can feel free to choose 1.8.x.
-:::
-
-:::{note}
-If you would like to use `opencv-python-headless` instead of `opencv-python`,
-e.g., in a minimum container environment or servers without GUI,
-you can first install it before installing MMCV to skip the installation of `opencv-python`.
-
-Alternatively, if it takes too long to install a dependency library, you can specify the pypi source
-
-```bash
-mim install "mmcv>=2.0.0rc1" -i https://pypi.tuna.tsinghua.edu.cn/simple
-```
-
-:::
-
-You can run [check_installation.py](https://github.com/open-mmlab/mmcv/blob/2.x/.dev_scripts/check_installation.py) to check the installation of mmcv after running the installation commands.
-
-#### Using mmcv with Docker
-
-Build with local repository
-
-```bash
-git clone https://github.com/open-mmlab/mmcv.git && cd mmcv
-docker build -t mmcv -f docker/release/Dockerfile .
-```
-
-Or build with remote repository
-
-```bash
-docker build -t mmcv https://github.com/open-mmlab/mmcv.git#2.x:docker/release
-```
-
-The [Dockerfile](release/Dockerfile) installs latest released version of mmcv-full by default, but you can specify mmcv versions to install expected versions.
-
-```bash
-docker image build -t mmcv -f docker/release/Dockerfile --build-arg MMCV=2.0.0rc1 .
-```
-
-If you also want to use other versions of PyTorch and CUDA, you can also pass them when building docker images.
-
-An example to build an image with PyTorch 1.11 and CUDA 11.3.
-
-```bash
-docker build -t mmcv -f docker/release/Dockerfile \
-    --build-arg PYTORCH=1.11.0 \
-    --build-arg CUDA=11.3 \
-    --build-arg CUDNN=8 \
-    --build-arg MMCV=2.0.0rc1 .
-```
-
-More available versions of PyTorch and CUDA can be found at [dockerhub/pytorch](https://hub.docker.com/r/pytorch/pytorch/tags).
-
-### Install mmcv-lite
-
-If you need to use PyTorch-related modules, make sure PyTorch has been successfully installed in your environment by referring to the [PyTorch official installation guide](https://github.com/pytorch/pytorch#installation).
-
-```python
-pip install mmcv-lite
-```
diff --git a/docs/en/get_started/introduction.md b/docs/en/get_started/introduction.md
deleted file mode 100644
index 461fcc7..0000000
--- a/docs/en/get_started/introduction.md
+++ /dev/null
@@ -1,36 +0,0 @@
-## Introduction
-
-MMCV is a foundational library for computer vision research and provides the following functionalities.
-
-- [Image/Video processing](../understand_mmcv/data_process.md)
-- [Image and annotation visualization](../understand_mmcv/visualization.md)
-- [Image transformation](../understand_mmcv/data_transform.md)
-- [Various CNN architectures](../understand_mmcv/cnn.md)
-- [High-quality implementation of common CUDA ops](../understand_mmcv/ops.md)
-
-It supports the following systems:
-
-- Linux
-- Windows
-- macOS
-
-It supports many research projects as below:
-
-- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
-- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark.
-- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection.
-- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark.
-- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark.
-- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark.
-- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox.
-- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark.
-- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark.
-- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark.
-- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark.
-- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark.
-- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark.
-- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark.
-- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark.
-- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
-- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
-- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.
diff --git a/docs/en/switch_language.md b/docs/en/switch_language.md
deleted file mode 100644
index 9dc7b34..0000000
--- a/docs/en/switch_language.md
+++ /dev/null
@@ -1,3 +0,0 @@
-## English
-
-## 简体中文
diff --git a/docs/en/understand_mmcv/cnn.md b/docs/en/understand_mmcv/cnn.md
deleted file mode 100644
index 2c42f25..0000000
--- a/docs/en/understand_mmcv/cnn.md
+++ /dev/null
@@ -1,120 +0,0 @@
-## CNN
-
-We provide some building bricks for CNNs, including layer building, module bundles and weight initialization.
-
-### Layer building
-
-We may need to try different layers of the same type when running experiments,
-but do not want to modify the code from time to time.
-Here we provide some layer building methods to construct layers from a dict,
-which can be written in configs or specified via command line arguments.
-
-#### Usage
-
-A simplest example is
-
-```python
-from mmcv.cnn import build_conv_layer
-
-cfg = dict(type='Conv3d')
-layer = build_conv_layer(cfg, in_channels=3, out_channels=8, kernel_size=3)
-```
-
-- `build_conv_layer`: Supported types are Conv1d, Conv2d, Conv3d, Conv (alias for Conv2d).
-- `build_norm_layer`: Supported types are BN1d, BN2d, BN3d, BN (alias for BN2d), SyncBN, GN, LN, IN1d, IN2d, IN3d, IN (alias for IN2d).
-- `build_activation_layer`: Supported types are ReLU, LeakyReLU, PReLU, RReLU, ReLU6, ELU, Sigmoid, Tanh, GELU.
-- `build_upsample_layer`: Supported types are nearest, bilinear, deconv, pixel_shuffle.
-- `build_padding_layer`: Supported types are zero, reflect, replicate.
-
-#### Extension
-
-We also allow extending the building methods with custom layers and operators.
-
-1. Write and register your own module.
-
-   ```python
-   from mmengine.registry import MODELS
-
-   @MODELS.register_module()
-   class MyUpsample:
-
-       def __init__(self, scale_factor):
-           pass
-
-       def forward(self, x):
-           pass
-   ```
-
-2. Import `MyUpsample` somewhere (e.g., in `__init__.py`) and then use it.
-
-   ```python
-   from mmcv.cnn import build_upsample_layer
-
-   cfg = dict(type='MyUpsample', scale_factor=2)
-   layer = build_upsample_layer(cfg)
-   ```
-
-### Module bundles
-
-We also provide common module bundles to facilitate the network construction.
-`ConvModule` is a bundle of convolution, normalization and activation layers,
-please refer to the [api](api.html#mmcv.cnn.ConvModule) for details.
-
-```python
-from mmcv.cnn import ConvModule
-
-# conv + bn + relu
-conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
-# conv + gn + relu
-conv = ConvModule(3, 8, 2, norm_cfg=dict(type='GN', num_groups=2))
-# conv + relu
-conv = ConvModule(3, 8, 2)
-# conv
-conv = ConvModule(3, 8, 2, act_cfg=None)
-# conv + leaky relu
-conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU'))
-# bn + conv + relu
-conv = ConvModule(
-    3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act'))
-```
-
-### Model Zoo
-
-Besides torchvision pre-trained models, we also provide pre-trained models of following CNN:
-
-- VGG Caffe
-- ResNet Caffe
-- ResNeXt
-- ResNet with Group Normalization
-- ResNet with Group Normalization and Weight Standardization
-- HRNetV2
-- Res2Net
-- RegNet
-
-#### Model URLs in JSON
-
-The model zoo links in MMCV are managed by JSON files.
-The json file consists of key-value pair of model name and its url or path.
-An example json file could be like:
-
-```json
-{
-    "model_a": "https://example.com/models/model_a_9e5bac.pth",
-    "model_b": "pretrain/model_b_ab3ef2c.pth"
-}
-```
-
-The default links of the pre-trained models hosted on OpenMMLab AWS could be found [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/model_zoo/open_mmlab.json).
-
-You may override default links by putting `open-mmlab.json` under `MMCV_HOME`. If `MMCV_HOME` is not found in your environment, `~/.cache/mmcv` will be used by default. You may use your own path with `export MMCV_HOME=/your/path`.
-
-The external json files will be merged into default one. If the same key presents in both external json and default json, the external one will be used.
-
-#### Load Checkpoint
-
-The following types are supported for `filename` of `mmcv.load_checkpoint()`.
-
-- filepath: The filepath of the checkpoint.
-- `http://xxx` and `https://xxx`: The link to download the checkpoint. The `SHA256` postfix should be contained in the filename.
-- `torchvision://xxx`: The model links in `torchvision.models`. Please refer to [torchvision](https://pytorch.org/docs/stable/torchvision/models.html) for details.
-- `open-mmlab://xxx`: The model links or filepath provided in default and additional json files.
diff --git a/docs/en/understand_mmcv/data_transform.md b/docs/en/understand_mmcv/data_transform.md
deleted file mode 100644
index 64c3af9..0000000
--- a/docs/en/understand_mmcv/data_transform.md
+++ /dev/null
@@ -1,341 +0,0 @@
-# Data Transformation
-
-In the OpenMMLab algorithm library, dataset construction and data preparation are decoupled. Usually, the construction of the dataset only parses the dataset and records the basic information of each sample, while the data preparation is a series of data transformations including data loading, preprocessing, formatting, and other operations performed according to the basic information of the sample.
-
-## Design of data transformation
-
-In MMCV, we use various callable data transformation classes to manipulate data. These data transformation classes can accept several configuration parameters for the instantiation and then process the input data dictionary by `__call__` method. All data transformation methods accept a dictionary as the input and produce the output as a dictionary as well. A simple example is as follows:
-
-```python
->>> import numpy as np
->>> from mmcv.transforms import Resize
->>>
->>> transform = Resize(scale=(224, 224))
->>> data_dict = {'img': np.random.rand(256, 256, 3)}
->>> data_dict = transform(data_dict)
->>> print(data_dict['img'].shape)
-(224, 224, 3)
-```
-
-The data transformation class reads some fields of the input dictionary and may add or update some fields. The keys of these fields are mostly fixed. For example, `Resize` will always read fields such as `"img"` in the input dictionary. More information about the conventions for input and output fields could be found in the documentation of the corresponding class.
-
-```{note}
-By convention, the order of image shape which is used as **initialization parameters** in data transformation (such as Resize, Pad) is (width, height). In the dictionary returned by the data transformation, the image related shape, such as `img_shape`, `ori_shape`, `pad_shape`, etc., is (height, width).
-```
-
-MMCV provides a unified base class called `BaseTransform` for all data transformation classes:
-
-```python
-class BaseTransform(metaclass=ABCMeta):
-
-    def __call__(self, results: dict) -> dict:
-
-        return self.transform(results)
-
-    @abstractmethod
-    def transform(self, results: dict) -> dict:
-        pass
-```
-
-All data transformation classes must inherit `BaseTransform` and implement the `transform` method. Both the input and output of the `transform` method are a dictionary. In the **Custom data transformation class** section, we will describe how to implement a data transformation class in more detail.
-
-## Data pipeline
-
-As mentioned above, the inputs and outputs of all data transformations are dictionaries. Moreover, according to the \[Convention on Datasets\] (TODO) in OpenMMLab, the basic information of each sample in the dataset is also a dictionary. This way, we can connect all data transformation operations end to end and combine them into a data pipeline. This pipeline inputs the information dictionary of the samples in the dataset and outputs the information dictionary after a series of processing.
-
-Taking the classification task as an example, we show a typical data pipeline in the figure below. For each sample, the information stored in the dataset is a dictionary, as shown on the far left in the figure. After each data transformation operation represented by the blue block, a new field (marked in green) will be added to the data dictionary or an existing field (marked in orange) will be updated.
-
-
- -
- -The data pipeline is a list of several data transformation configuration dictionaries in the configuration file. Each dataset needs to set the parameter `pipeline` to define the data preparation operations the dataset needs to perform. The configuration of the above data pipeline in the configuration file is as follows: - -```python -pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', size=256, keep_ratio=True), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]), - dict(type='ClsFormatBundle') -] - -dataset = dict( - ... - pipeline=pipeline, - ... -) -``` - -## Common data transformation classes - -The commonly used data transformation classes can be roughly divided into data loading, data preprocessing and augmentation, and data formatting. In MMCV, we provide some commonly used classes as follows: - -### Data loading - -To support the loading of large-scale datasets, data is usually not loaded when `Dataset` is initialized. Only the corresponding path is loaded. Therefore, it is necessary to load specific data in the data pipeline. - -| Class | Feature | -| :-------------------------: | :--------------------------------------------: | -| [`LoadImageFromFile`](TODO) | Load from file path | -| [`LoadAnnotations`](TODO) | Load and organize the annotations (bbox, etc.) | - -### Data preprocessing and enhancement - -Data preprocessing and augmentation usually involve transforming the image itself, such as cropping, padding, scaling, etc. - -| Class | Feature | -| :------------------------------: | :----------------------------------------------------: | -| [`Pad`](TODO) | Padding | -| [`CenterCrop`](TODO) | Center crop | -| [`Normalize`](TODO) | Image normalization | -| [`Resize`](TODO) | Resize to the specified size or ratio | -| [`RandomResize`](TODO) | Scale the image randomly within the specified range | -| [`RandomMultiscaleResize`](TODO) | Scale the image to a random size from multiple options | -| [`RandomGrayscale`](TODO) | Random grayscale | -| [`RandomFlip`](TODO) | Random flip | -| [`MultiScaleFlipAug`](TODO) | Support scaling and flipping during the testing | - -### Data formatting - -Data formatting operations are type conversions performed on the data. - -| Class | Feature | -| :---------------------: | :------------------------------------------: | -| [`ToTensor`](TODO) | Convert the specified data to `torch.Tensor` | -| [`ImageToTensor`](TODO) | Convert the image to `torch.Tensor` | - -## Customize data transformation classes - -To implement a new data transformation class, you must inherit `BaseTransform` and implement the `transform` method. Here, we use a simple flip transform (`MyFlip`) as an example: - -```python -import random -import mmcv -from mmcv.transforms import BaseTransform, TRANSFORMS - -@TRANSFORMS.register_module() -class MyFlip(BaseTransform): - def __init__(self, direction: str): - super().__init__() - self.direction = direction - - def transform(self, results: dict) -> dict: - img = results['img'] - results['img'] = mmcv.imflip(img, direction=self.direction) - return results -``` - -Now, we can instantiate `MyFlip` as a callable object to handle our data dictionary. - -```python -import numpy as np - -transform = MyFlip(direction='horizontal') -data_dict = {'img': np.random.rand(224, 224, 3)} -data_dict = transform(data_dict) -processed_img = data_dict['img'] -``` - -Alternatively, use `MyFlip` transform in the `pipeline` of the config file. - -```python -pipeline = [ - ... - dict(type='MyFlip', direction='horizontal'), - ... -] -``` - -It should be noted that if you want to use it in the configuration file, you must ensure that the file where the `MyFlip` class is located can be imported at the runtime. - -## Transform wrapper - -Transform wrappers are a special class of data transformations. They do not operate on images, labels or other information in the data dictionary by themselves. Instead, they enhance the behavior of data transformations defined in them. - -### KeyMapper - -`KeyMapper` is used to map fields in the data dictionary. For example, image processing transforms usually get their values from the `"img"` field in the data dictionary. But sometimes we want these transforms to handle images in other fields in the data dictionary, such as the `"gt_img"` field. - -When used with registry and configuration file, the field map wrapper should be used as follows: - -```python -pipeline = [ - ... - dict(type='KeyMapper', - mapping={ - 'img': 'gt_img', # map "gt_img" to "img" - 'mask': ..., # The "mask" field in the raw data is not used. That is, for wrapped data transformations, the "mask" field is not included in the data - }, - auto_remap=True, # remap "img" back to "gt_img" after the transformation - transforms=[ - # only need to specify "img" in `RandomFlip` - dict(type='RandomFlip'), - ]) - ... -] -``` - -With `KeyMapper`, we don't need to consider various possible input field names in the `transform` method when we implement the data transformation class. We only need to deal with the default fields. - -### RandomChoice and RandomApply - -`RandomChoice` is used to randomly select a data transformation pipeline from the given choices. With this wrapper, we can easily implement some data augmentation functions, such as AutoAugment. - -In configuration file, you can use `RandomChoice` as follows: - -```python -pipeline = [ - ... - dict(type='RandomChoice', - transforms=[ - [ - dict(type='Posterize', bits=4), - dict(type='Rotate', angle=30.) - ], # the first combo option - [ - dict(type='Equalize'), - dict(type='Rotate', angle=30) - ], # the second combo option - ], - prob=[0.4, 0.6] # the prob of each combo - ) - ... -] -``` - -`RandomApply` is used to randomly perform a combination of data transformations with a specified probability. For example: - -```python -pipeline = [ - ... - dict(type='RandomApply', - transforms=[dict(type='Rotate', angle=30.)], - prob=0.3) # perform the transformation with prob as 0.3 - ... -] -``` - -### TransformBroadcaster - -Usually, a data transformation class only reads the target of an operation from one field. While we can also use `KeyMapper` to change the fields read, there is no way to apply transformations to the data of multiple fields at once. To achieve this, we need to use the multi-target extension wrapper `TransformBroadcaster`. - -`TransformBroadcaster` has two uses, one is to apply data transformation to multiple specified fields, and the other is to apply data transformation to a group of targets under a field. - -1. Apply to multiple fields - - Suppose we need to apply a data transformation to images in two fields `"lq"` (low-quality) and `"gt"` (ground-truth). - - ```python - pipeline = [ - dict(type='TransformBroadcaster', - # apply to the "lq" and "gt" fields respectively, and set the "img" field to both - mapping={'img': ['lq', 'gt']}, - # remap the "img" field back to the original field after the transformation - auto_remap=True, - # whether to share random variables in the transformation of each target - # more introduction will be referred in the following chapters (random variable sharing) - share_random_params=True, - transforms=[ - # only need to manipulate the "img" field in the `RandomFlip` class - dict(type='RandomFlip'), - ]) - ] - ``` - - In the `mapping` setting of the multi-target extension, we can also use `...` to ignore the specified original field. As shown in the following example, the wrapped `RandomCrop` will crop the image in the field `"img"` and update the size of the cropped image if the field `"img_shape"` exists. If we want to do the same random cropping for both image fields `"lq"` and `"gt"` at the same time but update the `"img_shape"` field only once, we can do it as in the example: - - ```python - pipeline = [ - dict(type='TransformBroadcaster', - mapping={ - 'img': ['lq', 'gt'], - 'img_shape': ['img_shape', ...], - }, - # remap the "img" and "img_shape" fields back to their original fields after the transformation - auto_remap=True, - # whether to share random variables in the transformation of each target - # more introduction will be referred in the following chapters (random variable sharing) - share_random_params=True, - transforms=[ - # "img" and "img_shape" fields are manipulated in the `RandomCrop` class - # if "img_shape" is missing, only operate on "img" - dict(type='RandomCrop'), - ]) - ] - ``` - -2. A set of targets applied to a field - - Suppose we need to apply a data transformation to the `"images"` field, which is a list of images. - - ```python - pipeline = [ - dict(type='TransformBroadcaster', - # map each image under the "images" field to the "img" field - mapping={'img': 'images'}, - # remap the images under the "img" field back to the list in the "images" field after the transformation - auto_remap=True, - # whether to share random variables in the transformation of each target - share_random_params=True, - transforms=[ - # in the `RandomFlip` transformation class, we only need to manipulate the "img" field - dict(type='RandomFlip'), - ]) - ] - ``` - -#### Decorator `cache_randomness` - -In `TransformBroadcaster`, we provide the `share_random_params` option to support sharing random states across multiple data transformations. For example, in a super-resolution task, we want to apply **the same** random transformations **simultaneously** to the low-resolution image and the original image. If we use this function in a custom data transformation class, we need to mark which random variables support sharing in the class. This can be achieved with the decorator `cache_randomness`. - -Taking `MyFlip` from the above example, we want to perform flipping randomly with a certain probability: - -```python -from mmcv.transforms.utils import cache_randomness - -@TRANSFORMS.register_module() -class MyRandomFlip(BaseTransform): - def __init__(self, prob: float, direction: str): - super().__init__() - self.prob = prob - self.direction = direction - - @cache_randomness # label the output of the method as a shareable random variable - def do_flip(self): - flip = True if random.random() > self.prob else False - return flip - - def transform(self, results: dict) -> dict: - img = results['img'] - if self.do_flip(): - results['img'] = mmcv.imflip(img, direction=self.direction) - return results -``` - -In the above example, we decorate the `do_flip` method with `cache_randomness`, marking the method return value `flip` as a random variable that supports sharing. Therefore, in the transformation of `TransformBroadcaster` to multiple targets, the value of this variable will remain the same. - -#### Decorator `avoid_cache_randomness` - -In some cases, we cannot separate the process of generating random variables in data transformation into a class method. For example, modules from third-party libraries used in data transformation encapsulate the relevant parts of random variables inside, making them impossible to be extracted as class methods for data transformation. Such data transformations cannot support shared random variables through the decorator `cache_randomness` annotation, and thus cannot share random variables during multi-objective expansion. - -To avoid misuse of such data transformations in multi-object extensions, we provide another decorator, `avoid_cache_randomness`, to mark such data transformations: - -```python -from mmcv.transforms.utils import avoid_cache_randomness - -@TRANSFORMS.register_module() -@avoid_cache_randomness -class MyRandomTransform(BaseTransform): - - def transform(self, results: dict) -> dict: - ... -``` - -Data transformation classes marked with `avoid_cache_randomness` will throw an exception when their instance is wrapped by `TransformBroadcaster` and the parameter `share_random_params` is set to True. This reminds the user not to use it in this way. - -There are a few things to keep in mind when using `avoid_cache_randomness`: - -1. `avoid_cache_randomness` is only used to decorate data transformation classes (subclasses of `BaseTransfrom`) and cannot be used to decorate other general classes, class methods, or functions -2. When a data transformation decorated with `avoid_cache_randomness` is used as a base class, its subclasses **will not inherit** its feature. If the subclass is still unable to share random variables, `avoid_cache_randomness` should be used again. -3. A data transformation needs to be modified with `avoid_cache_randomness` only when a data transformation is random and cannot share its random parameters. Data transformations without randomness require no decoration diff --git a/docs/en/understand_mmcv/ops.md b/docs/en/understand_mmcv/ops.md deleted file mode 100644 index e60f77c..0000000 --- a/docs/en/understand_mmcv/ops.md +++ /dev/null @@ -1,66 +0,0 @@ -## ops - -We implement common ops used in detection, segmentation, etc. - -| Device | CPU | CUDA | MLU | MPS | Ascend | -| ---------------------------- | --- | ---- | --- | --- | ------ | -| ActiveRotatedFilter | √ | √ | | | | -| AssignScoreWithK | | √ | | | | -| BallQuery | | √ | | | | -| BBoxOverlaps | | √ | √ | √ | √ | -| BorderAlign | | √ | | | | -| BoxIouRotated | √ | √ | | | | -| BoxIouQuadri | √ | √ | | | | -| CARAFE | | √ | √ | | | -| ChamferDistance | | √ | | | | -| CrissCrossAttention | | √ | | | | -| ContourExpand | √ | | | | | -| ConvexIoU | | √ | | | | -| CornerPool | | √ | | | | -| Correlation | | √ | | | | -| Deformable Convolution v1/v2 | √ | √ | | | √ | -| Deformable RoIPool | | √ | √ | | √ | -| DiffIoURotated | | √ | | | | -| DynamicScatter | | √ | | | | -| FurthestPointSample | | √ | | | | -| FurthestPointSampleWithDist | | √ | | | | -| FusedBiasLeakyrelu | | √ | | | √ | -| GatherPoints | | √ | | | √ | -| GroupPoints | | √ | | | | -| Iou3d | | √ | √ | | | -| KNN | | √ | | | | -| MaskedConv | | √ | √ | | √ | -| MergeCells | | √ | | | | -| MinAreaPolygon | | √ | | | | -| ModulatedDeformConv2d | √ | √ | | | √ | -| MultiScaleDeformableAttn | | √ | √ | | | -| NMS | √ | √ | √ | | √ | -| NMSRotated | √ | √ | | | √ | -| NMSQuadri | √ | √ | | | | -| PixelGroup | √ | | | | | -| PointsInBoxes | √ | √ | | | | -| PointsInPolygons | | √ | | | | -| PSAMask | √ | √ | √ | | √ | -| RotatedFeatureAlign | √ | √ | | | | -| RoIPointPool3d | | √ | √ | | | -| RoIPool | | √ | √ | | √ | -| RoIAlignRotated | √ | √ | √ | | | -| RiRoIAlignRotated | | √ | | | | -| RoIAlign | √ | √ | √ | | | -| RoIAwarePool3d | | √ | √ | | | -| SAConv2d | | √ | | | | -| SigmoidFocalLoss | | √ | √ | | √ | -| SoftmaxFocalLoss | | √ | | | √ | -| SoftNMS | | √ | | | | -| Sparse Convolution | | √ | | | | -| Synchronized BatchNorm | | √ | | | | -| ThreeInterpolate | | √ | | | | -| ThreeNN | | √ | √ | | | -| TINShift | | √ | √ | | | -| UpFirDn2d | | √ | | | | -| Voxelization | √ | √ | | | √ | -| PrRoIPool | | √ | | | | -| BezierAlign | √ | √ | | | | -| BiasAct | | √ | | | | -| FilteredLrelu | | √ | | | | -| Conv2dGradfix | | √ | | | | diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 0000000..ab0dd13 --- /dev/null +++ b/docs/faq.md @@ -0,0 +1,42 @@ +## Frequently Asked Questions + +We list some common troubles faced by many users and their corresponding solutions here. +Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. + +- Compatibility issue between MMCV and MMDetection; "ConvWS is already registered in conv layer" + + Please install the correct version of MMCV for the version of your MMDetection following the instruction above. + +- "No module named 'mmcv.ops'"; "No module named 'mmcv._ext'". + + 1. Uninstall existing mmcv in the environment using `pip uninstall mmcv`. + 2. Install mmcv-full following the instruction above. + +- "invalid device function" or "no kernel image is available for execution". + + 1. Check the CUDA compute capability of you GPU. + 2. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, + and MMCV are built for the correct GPU architecture. + You may need to set `TORCH_CUDA_ARCH_LIST` to reinstall MMCV. + The compatibility issue could happen when using old GPUS, e.g., Tesla K80 (3.7) on colab. + 3. Check whether the running environment is the same as that when mmcv/mmdet is compiled. + For example, you may compile mmcv using CUDA 10.0 bug run it on CUDA9.0 environments. + +- "undefined symbol" or "cannot open xxx.so". + + 1. If those symbols are CUDA/C++ symbols (e.g., libcudart.so or GLIBCXX), check + whether the CUDA/GCC runtimes are the same as those used for compiling mmcv. + 2. If those symbols are Pytorch symbols (e.g., symbols containing caffe, aten, and TH), check whether + the Pytorch version is the same as that used for compiling mmcv. + 3. Run `python mmdet/utils/collect_env.py` to check whether PyTorch, torchvision, + and MMCV are built by and running on the same environment. + +- "RuntimeError: CUDA error: invalid configuration argument". + + This error may be due to your poor GPU. Try to decrease the value of [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10) + and recompile mmcv. + +- "RuntimeError: nms is not compiled with GPU support". + + This error is because your CUDA environment is not installed correctly. + You may try to re-install your CUDA environment and then delete the build/ folder before re-compile mmcv. diff --git a/docs/get_started/build.md b/docs/get_started/build.md new file mode 100644 index 0000000..758a83a --- /dev/null +++ b/docs/get_started/build.md @@ -0,0 +1,234 @@ +## Build MMCV from source + +### Build on Linux or macOS + +After cloning the repo with + +```bash +git clone https://github.com/open-mmlab/mmcv.git +cd mmcv +``` + +You can either + +- install the lite version + + ```bash + pip install -e . + ``` + +- install the full version + + ```bash + MMCV_WITH_OPS=1 pip install -e . + ``` + +If you are on macOS, add the following environment variables before the installing command. + +```bash +CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' +``` + +e.g., + +```bash +CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' MMCV_WITH_OPS=1 pip install -e . +``` + +```{note} +If you would like to use `opencv-python-headless` instead of `opencv-python`, +e.g., in a minimum container environment or servers without GUI, +you can first install it before installing MMCV to skip the installation of `opencv-python`. +``` +### Build on Windows + +Building MMCV on Windows is a bit more complicated than that on Linux. +The following instructions show how to get this accomplished. + +#### Prerequisite + +The following software is required for building MMCV on windows. +Install them first. + +- [Git](https://git-scm.com/download/win) + - During installation, tick **add git to Path**. +- [Visual Studio Community 2019](https://visualstudio.microsoft.com) + - A compiler for C++ and CUDA codes. +- [Miniconda](https://docs.conda.io/en/latest/miniconda.html) + - Official distributions of Python should work too. +- [CUDA 10.2](https://developer.nvidia.com/cuda-10.2-download-archive) + - Not required for building CPU version. + - Customize the installation if necessary. As a recommendation, skip the driver installation if a newer version is already installed. + +```{note} +You should know how to set up environment variables, especially `Path`, on Windows. The following instruction relies heavily on this skill. +``` + +#### Setup Python Environment + +1. Launch Anaconda prompt from Windows Start menu + + Do not use raw `cmd.exe` s instruction is based on PowerShell syntax. + +1. Create a new conda environment + + ```shell + conda create --name mmcv python=3.7 # 3.6, 3.7, 3.8 should work too as tested + conda activate mmcv # make sure to activate environment before any operation + ``` + +1. Install PyTorch. Choose a version based on your need. + + ```shell + conda install pytorch torchvision cudatoolkit=10.2 -c pytorch + ``` + + We only tested PyTorch version >= 1.6.0. + +1. Prepare MMCV source code + + ```shell + git clone https://github.com/open-mmlab/mmcv.git + cd mmcv + ``` + +1. Install required Python packages + + ```shell + pip3 install -r requirements.txt + ``` + +#### Build and install MMCV + +MMCV can be built in three ways: + +1. Lite version (without ops) + + In this way, no custom ops are compiled and mmcv is a pure python package. + +1. Full version (CPU ops) + + Module `ops` will be compiled as a pytorch extension, but only x86 code will be compiled. The compiled ops can be executed on CPU only. + +1. Full version (CUDA ops) + + Both x86 and CUDA codes of `ops` module will be compiled. The compiled version can be run on both CPU and CUDA-enabled GPU (if implemented). + +##### Common steps + +1. Set up MSVC compiler + + Set Environment variable, add `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\bin\Hostx86\x64` to `PATH`, so that `cl.exe` will be available in prompt, as shown below. + + ```none + (base) PS C:\Users\xxx> cl + Microsoft (R) C/C++ Optimizing Compiler Version 19.27.29111 for x64 + Copyright (C) Microsoft Corporation. All rights reserved. + + usage: cl [ option... ] filename... [ / link linkoption... ] + ``` + + For compatibility, we use the x86-hosted and x64-targeted compiler. note `Hostx86\x64` in the path. + + You may want to change the system language to English because pytorch will parse text output from `cl.exe` to check its version. However only utf-8 is recognized. Navigate to Control Panel -> Region -> Administrative -> Language for Non-Unicode programs and change it to English. + +##### Option 1: Build MMCV (lite version) + +After finishing above common steps, launch Anaconda shell from Start menu and issue the following commands: + +```shell +# activate environment +conda activate mmcv +# change directory +cd mmcv +# install +python setup.py develop +# check +pip list +``` + +##### Option 2: Build MMCV (full version with CPU) + +1. Finish above common steps +1. Set up environment variables + + ```shell + $env:MMCV_WITH_OPS = 1 + $env:MAX_JOBS = 8 # based on your available number of CPU cores and amount of memory + ``` + +1. Following build steps of the lite version + + ```shell + # activate environment + conda activate mmcv + # change directory + cd mmcv + # build + python setup.py build_ext # if success, cl will be launched to compile ops + # install + python setup.py develop + # check + pip list + ``` + +##### Option 3: Build MMCV (full version with CUDA) + +1. Finish above common steps +1. Make sure `CUDA_PATH` or `CUDA_HOME` is already set in `envs` via `ls env:`, desired output is shown as below: + + ```none + (base) PS C:\Users\WRH> ls env: + + Name Value + ---- ----- + <... omit some lines ...> + CUDA_PATH C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 + CUDA_PATH_V10_1 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1 + CUDA_PATH_V10_2 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 + <... omit some lines ...> + ``` + + This should already be done by CUDA installer. If not, or you have multiple version of CUDA toolkit installed, set it with + + ```shell + $env:CUDA_HOME = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" + # OR + $env:CUDA_HOME = $env:CUDA_PATH_V10_2 # if CUDA_PATH_V10_2 is in envs: + ``` + +1. Set CUDA target arch + + ```shell + # Suppose you are using GTX 1080, which is of capability 6.1 + $env:TORCH_CUDA_ARCH_LIST="6.1" + # OR build all supported arch, will be slow + $env:TORCH_CUDA_ARCH_LIST="3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5" + ``` + +```{note} +Check your the compute capability of your GPU from [here](https://developer.nvidia.com/cuda-gpus). +``` + +1. Launch compiling the same way as CPU + + ```shell + $env:MMCV_WITH_OPS = 1 + $env:MAX_JOBS = 8 # based on available number of CPU cores and amount of memory + # activate environment + conda activate mmcv + # change directory + cd mmcv + # build + python setup.py build_ext # if success, cl will be launched to compile ops + # install + python setup.py develop + # check + pip list + ``` + +```{note} +If you are compiling against PyTorch 1.6.0, you might meet some errors from PyTorch as described in [this issue](https://github.com/pytorch/pytorch/issues/42467). Follow [this pull request](https://github.com/pytorch/pytorch/pull/43380/files) to modify the source code in your local PyTorch installation. +``` + +If you meet issues when running or compiling mmcv, we list some common issues in [Frequently Asked Question](../faq.html). diff --git a/docs/get_started/installation.md b/docs/get_started/installation.md new file mode 100644 index 0000000..0c64ea8 --- /dev/null +++ b/docs/get_started/installation.md @@ -0,0 +1,162 @@ +## Installation + +There are two versions of MMCV: + +- **mmcv-full**: comprehensive, with full features and various CUDA ops out of box. It takes longer time to build. +- **mmcv**: lite, without CUDA ops but all other features, similar to mmcv<1.0.0. It is useful when you do not need those CUDA ops. + +```{warning} +Do not install both versions in the same environment, otherwise you may encounter errors like `ModuleNotFound`. You need to uninstall one before installing the other. `Installing the full version is highly recommended if CUDA is avaliable`. +``` + +a. Install the full version. + +Before installing mmcv-full, make sure that PyTorch has been successfully installed following the [official guide](https://pytorch.org/). + +We provide pre-built mmcv packages (recommended) with different PyTorch and CUDA versions to simplify the building. In addition, you can run [check_installation.py](.dev_scripts/check_installation.py) to check the installation of mmcv-full after running the installation commands. + +i. Install the latest version. + +The rule for installing the latest ``mmcv-full`` is as follows: + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html +``` + +Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one. For example, +to install the latest ``mmcv-full`` with ``CUDA 11.1`` and ``PyTorch 1.9.0``, use the following command: + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html +``` + +For more details, please refer the the following tables and delete ``=={mmcv_version}``. + +ii. Install a specified version. + +The rule for installing a specified ``mmcv-full`` is as follows: + +```shell +pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html +``` + +First of all, please refer to the Releases and replace ``{mmcv_version}`` a specified one. e.g. ``1.3.9``. +Then replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired versions. For example, +to install ``mmcv-full==1.3.9`` with ``CUDA 11.1`` and ``PyTorch 1.9.0``, use the following command: + +```shell +pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html +``` + +```{note} +mmcv-full is only compiled on PyTorch 1.x.0 because the compatibility +usually holds between 1.x.0 and 1.x.1. If your PyTorch version is 1.x.1, you +can install mmcv-full compiled with PyTorch 1.x.0 and it usually works well. +For example, if your PyTorch version is 1.8.1 and CUDA version is 11.1, you +can use the following command to install mmcv-full. + +`pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html` +``` + +For more details, please refer the the following tables. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUDA torch 1.10torch 1.9torch 1.8torch 1.7torch 1.6torch 1.5
11.3
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
11.1
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
11.0
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
10.2
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html
install
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html
10.1
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html
9.2
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html
cpu
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html
install
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html
+ +```{note} +The pre-built packages provided above do not include all versions of mmcv-full, you can click on the corresponding links to see the supported versions. For example, if you click [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html), you can see that `cu102-torch1.8.0` only provides 1.3.0 and above versions of mmcv-full. In addition, We no longer provide `mmcv-full` pre-built packages compiled with `PyTorch 1.3 & 1.4` since v1.3.17. You can find previous versions that compiled with PyTorch 1.3 & 1.4 [here](./docs/get_started/previous_versions.md). The compatibility is still ensured in our CI, but we will discard the support of PyTorch 1.3 & 1.4 next year. +``` + +Another way is to compile locally by running + +```python +pip install mmcv-full +``` + +Note that the local compiling may take up to 10 mins. + +b. Install the lite version. + +```python +pip install mmcv +``` + +c. Install full version with custom operators for onnxruntime + +- Check [here](https://mmcv.readthedocs.io/en/latest/deployment/onnxruntime_custom_ops.html) for detailed instruction. + +If you would like to build MMCV from source, please refer to the [guide](https://mmcv.readthedocs.io/en/latest/get_started/build.html). diff --git a/docs/get_started/introduction.md b/docs/get_started/introduction.md new file mode 100644 index 0000000..4ffb59d --- /dev/null +++ b/docs/get_started/introduction.md @@ -0,0 +1,29 @@ +## Introduction + +MMCV is a foundational library for computer vision research and supports many +research projects as below: + +- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark. +- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark. +- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection. +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark. +- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark. +- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark. +- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark. +- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox. +- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition and understanding toolbox. +- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox. + +It provides the following functionalities. + +- Universal IO APIs +- Image/Video processing +- Image and annotation visualization +- Useful utilities (progress bar, timer, ...) +- PyTorch runner with hooking mechanism +- Various CNN architectures +- High-quality implementation of common CUDA ops + +```{note} +MMCV requires Python 3.6+. +``` diff --git a/docs/en/get_started/previous_versions.md b/docs/get_started/previous_versions.md similarity index 93% rename from docs/en/get_started/previous_versions.md rename to docs/get_started/previous_versions.md index a9c3717..c91180d 100644 --- a/docs/en/get_started/previous_versions.md +++ b/docs/get_started/previous_versions.md @@ -4,7 +4,7 @@ We no longer provide `mmcv-full` packages compiled under lower versions of `PyTo ### PyTorch 1.4 -| 1.0.0 \<= mmcv_version \<= 1.2.1 +| 1.0.0 <= mmcv_version <= 1.2.1 #### CUDA 10.1 @@ -26,7 +26,7 @@ pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dis ### PyTorch v1.3 -| 1.0.0 \<= mmcv_version \<= 1.3.16 +| 1.0.0 <= mmcv_version <= 1.3.16 #### CUDA 10.1 diff --git a/docs/en/index.rst b/docs/index.rst similarity index 71% rename from docs/en/index.rst rename to docs/index.rst index dee2c37..6019f10 100644 --- a/docs/en/index.rst +++ b/docs/index.rst @@ -15,23 +15,27 @@ You can switch between Chinese and English documents in the lower-left corner of :maxdepth: 2 :caption: Understand MMCV + understand_mmcv/config.md + understand_mmcv/registry.md + understand_mmcv/runner.md + understand_mmcv/io.md understand_mmcv/data_process.md - understand_mmcv/data_transform.md understand_mmcv/visualization.md understand_mmcv/cnn.md understand_mmcv/ops.md + understand_mmcv/utils.md .. toctree:: :maxdepth: 2 :caption: Deployment + deployment/onnx.md + deployment/onnxruntime_op.md + deployment/onnxruntime_custom_ops.md + deployment/tensorrt_plugin.md + deployment/tensorrt_custom_ops.md deployment/mmcv_ops_definition.md -.. toctree:: - :caption: Switch Language - - switch_language.md - .. toctree:: :maxdepth: 2 :caption: Compatibility @@ -39,6 +43,8 @@ You can switch between Chinese and English documents in the lower-left corner of compatibility.md .. toctree:: + :maxdepth: 2 + :caption: FAQ faq.md @@ -50,17 +56,10 @@ You can switch between Chinese and English documents in the lower-left corner of community/pr.md .. toctree:: - :maxdepth: 1 + :maxdepth: 2 :caption: API Reference - mmcv.image - mmcv.video - mmcv.visualization - mmcv.cnn - mmcv.ops - mmcv.transforms - mmcv.arraymisc - mmcv.utils + api.rst Indices and tables ================== diff --git a/docs/en/make.bat b/docs/make.bat similarity index 100% rename from docs/en/make.bat rename to docs/make.bat diff --git a/docs/en/mmcv-logo.png b/docs/mmcv-logo.png similarity index 100% rename from docs/en/mmcv-logo.png rename to docs/mmcv-logo.png diff --git a/docs/understand_mmcv/cnn.md b/docs/understand_mmcv/cnn.md new file mode 100644 index 0000000..749cb95 --- /dev/null +++ b/docs/understand_mmcv/cnn.md @@ -0,0 +1,538 @@ +## CNN + +We provide some building bricks for CNNs, including layer building, module bundles and weight initialization. + +### Layer building + +We may need to try different layers of the same type when running experiments, +but do not want to modify the code from time to time. +Here we provide some layer building methods to construct layers from a dict, +which can be written in configs or specified via command line arguments. + +#### Usage + +A simplest example is + +```python +cfg = dict(type='Conv3d') +layer = build_conv_layer(cfg, in_channels=3, out_channels=8, kernel_size=3) +``` + +- `build_conv_layer`: Supported types are Conv1d, Conv2d, Conv3d, Conv (alias for Conv2d). +- `build_norm_layer`: Supported types are BN1d, BN2d, BN3d, BN (alias for BN2d), SyncBN, GN, LN, IN1d, IN2d, IN3d, IN (alias for IN2d). +- `build_activation_layer`: Supported types are ReLU, LeakyReLU, PReLU, RReLU, ReLU6, ELU, Sigmoid, Tanh, GELU. +- `build_upsample_layer`: Supported types are nearest, bilinear, deconv, pixel_shuffle. +- `build_padding_layer`: Supported types are zero, reflect, replicate. + +#### Extension + +We also allow extending the building methods with custom layers and operators. + +1. Write and register your own module. + + ```python + from mmcv.cnn import UPSAMPLE_LAYERS + + @UPSAMPLE_LAYERS.register_module() + class MyUpsample: + + def __init__(self, scale_factor): + pass + + def forward(self, x): + pass + ``` + +2. Import `MyUpsample` somewhere (e.g., in `__init__.py`) and then use it. + + ```python + cfg = dict(type='MyUpsample', scale_factor=2) + layer = build_upsample_layer(cfg) + ``` + +### Module bundles + +We also provide common module bundles to facilitate the network construction. +`ConvModule` is a bundle of convolution, normalization and activation layers, +please refer to the [api](api.html#mmcv.cnn.ConvModule) for details. + +```python +# conv + bn + relu +conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) +# conv + gn + relu +conv = ConvModule(3, 8, 2, norm_cfg=dict(type='GN', num_groups=2)) +# conv + relu +conv = ConvModule(3, 8, 2) +# conv +conv = ConvModule(3, 8, 2, act_cfg=None) +# conv + leaky relu +conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU')) +# bn + conv + relu +conv = ConvModule( + 3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act')) +``` + +### Weight initialization + +> Implementation details are available at [mmcv/cnn/utils/weight_init.py](../../mmcv/cnn/utils/weight_init.py) + +During training, a proper initialization strategy is beneficial to speed up the +training or obtain a higher performance. In MMCV, we provide some commonly used +methods for initializing modules like `nn.Conv2d`. Of course, we also provide +high-level APIs for initializing models containing one or more +modules. + +#### Initialization functions + +Initialize a `nn.Module` such as `nn.Conv2d`, `nn.Linear` in a functional way. + +We provide the following initialization methods. + +- constant_init + + Initialize module parameters with constant values. + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import constant_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # constant_init(module, val, bias=0) + >>> constant_init(conv1, 1, 0) + >>> conv1.weight + ``` + +- xavier_init + + Initialize module parameters with values according to the method + described in [Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010)](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import xavier_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # xavier_init(module, gain=1, bias=0, distribution='normal') + >>> xavier_init(conv1, distribution='normal') + ``` + +- normal_init + + Initialize module parameters with the values drawn from a normal distribution. + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import normal_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # normal_init(module, mean=0, std=1, bias=0) + >>> normal_init(conv1, std=0.01, bias=0) + ``` + +- uniform_init + + Initialize module parameters with values drawn from a uniform distribution. + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import uniform_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # uniform_init(module, a=0, b=1, bias=0) + >>> uniform_init(conv1, a=0, b=1) + ``` + +- kaiming_init + + Initialize module parameters with the values according to the method + described in [Delving deep into rectifiers: Surpassing human-level + performance on ImageNet classification - He, K. et al. (2015)](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf) + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import kaiming_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal') + >>> kaiming_init(conv1) + ``` + +- caffe2_xavier_init + + The xavier initialization is implemented in caffe2, which corresponds to `kaiming_uniform_` in PyTorch. + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import caffe2_xavier_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # caffe2_xavier_init(module, bias=0) + >>> caffe2_xavier_init(conv1) + ``` + +- bias_init_with_prob + + Initialize conv/fc bias value according to a given probability, as proposed in [Focal Loss for Dense Object Detection](https://arxiv.org/pdf/1708.02002.pdf). + + ```python + >>> from mmcv.cnn import bias_init_with_prob + >>> # bias_init_with_prob is proposed in Focal Loss + >>> bias = bias_init_with_prob(0.01) + >>> bias + -4.59511985013459 + ``` + +#### Initializers and configs + +On the basis of the initialization methods, we define the corresponding initialization classes and register them to `INITIALIZERS`, so we can +use the configuration to initialize the model. + +We provide the following initialization classes. + +- ConstantInit +- XavierInit +- NormalInit +- UniformInit +- KaimingInit +- Caffe2XavierInit +- PretrainedInit + +Let us introduce the usage of `initialize` in detail. + +1. Initialize model by `layer` key + + If we only define `layer`, it just initialize the layer in `layer` key. + + NOTE: Value of `layer` key is the class name with attributes weights and bias of Pytorch, so `MultiheadAttention layer` is not supported. + +- Define `layer` key for initializing module with same configuration. + + ```python + import torch.nn as nn + from mmcv.cnn import initialize + + class FooNet(nn.Module): + def __init__(self): + super().__init__() + self.feat = nn.Conv1d(3, 1, 3) + self.reg = nn.Conv2d(3, 3, 3) + self.cls = nn.Linear(1, 2) + + model = FooNet() + init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d', 'Linear'], val=1) + # initialize whole module with same configuration + initialize(model, init_cfg) + # model.feat.weight + # Parameter containing: + # tensor([[[1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.]]], requires_grad=True) + ``` + +- Define `layer` key for initializing layer with different configurations. + + ```python + import torch.nn as nn + from mmcv.cnn.utils import initialize + + class FooNet(nn.Module): + def __init__(self): + super().__init__() + self.feat = nn.Conv1d(3, 1, 3) + self.reg = nn.Conv2d(3, 3, 3) + self.cls = nn.Linear(1,2) + + model = FooNet() + init_cfg = [dict(type='Constant', layer='Conv1d', val=1), + dict(type='Constant', layer='Conv2d', val=2), + dict(type='Constant', layer='Linear', val=3)] + # nn.Conv1d will be initialized with dict(type='Constant', val=1) + # nn.Conv2d will be initialized with dict(type='Constant', val=2) + # nn.Linear will be initialized with dict(type='Constant', val=3) + initialize(model, init_cfg) + # model.reg.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + ``` + +2. Initialize model by `override` key + +- When initializing some specific part with its attribute name, we can use `override` key, and the value in `override` will ignore the value in init_cfg. + + ```python + import torch.nn as nn + from mmcv.cnn import initialize + + class FooNet(nn.Module): + def __init__(self): + super().__init__() + self.feat = nn.Conv1d(3, 1, 3) + self.reg = nn.Conv2d(3, 3, 3) + self.cls = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) + + # if we would like to initialize model's weights as 1 and bias as 2 + # but weight in `cls` as 3 and bias 4, we can use override key + model = FooNet() + init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2, + override=dict(type='Constant', name='reg', val=3, bias=4)) + # self.feat and self.cls will be initialized with dict(type='Constant', val=1, bias=2) + # The module called 'reg' will be initialized with dict(type='Constant', val=3, bias=4) + initialize(model, init_cfg) + # model.reg.weight + # Parameter containing: + # tensor([[[[3., 3., 3.], + # [3., 3., 3.], + # [3., 3., 3.]], + # ..., + # [[3., 3., 3.], + # [3., 3., 3.], + # [3., 3., 3.]]]], requires_grad=True) + ``` + +- If `layer` is None in init_cfg, only sub-module with the name in override will be initialized, and type and other args in override can be omitted. + + ```python + model = FooNet() + init_cfg = dict(type='Constant', val=1, bias=2, override=dict(name='reg')) + # self.feat and self.cls will be initialized by Pytorch + # The module called 'reg' will be initialized with dict(type='Constant', val=1, bias=2) + initialize(model, init_cfg) + # model.reg.weight + # Parameter containing: + # tensor([[[[1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.]], + # ..., + # [[1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.]]]], requires_grad=True) + ``` + +- If we don't define `layer` key or `override` key, it will not initialize anything. + +- Invalid usage + + ```python + # It is invalid that override don't have name key + init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], + val=1, bias=2, + override=dict(type='Constant', val=3, bias=4)) + + # It is also invalid that override has name and other args except type + init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], + val=1, bias=2, + override=dict(name='reg', val=3, bias=4)) + ``` + +3. Initialize model with the pretrained model + + ```python + import torch.nn as nn + import torchvision.models as models + from mmcv.cnn import initialize + + # initialize model with pretrained model + model = models.resnet50() + # model.conv1.weight + # Parameter containing: + # tensor([[[[-6.7435e-03, -2.3531e-02, -9.0143e-03, ..., -2.1245e-03, + # -1.8077e-03, 3.0338e-03], + # [-1.2603e-02, -2.7831e-02, 2.3187e-02, ..., -1.5793e-02, + # 1.1655e-02, 4.5889e-03], + # [-3.7916e-02, 1.2014e-02, 1.3815e-02, ..., -4.2651e-03, + # 1.7314e-02, -9.9998e-03], + # ..., + + init_cfg = dict(type='Pretrained', + checkpoint='torchvision://resnet50') + initialize(model, init_cfg) + # model.conv1.weight + # Parameter containing: + # tensor([[[[ 1.3335e-02, 1.4664e-02, -1.5351e-02, ..., -4.0896e-02, + # -4.3034e-02, -7.0755e-02], + # [ 4.1205e-03, 5.8477e-03, 1.4948e-02, ..., 2.2060e-03, + # -2.0912e-02, -3.8517e-02], + # [ 2.2331e-02, 2.3595e-02, 1.6120e-02, ..., 1.0281e-01, + # 6.2641e-02, 5.1977e-02], + # ..., + + # initialize weights of a sub-module with the specific part of a pretrained model by using 'prefix' + model = models.resnet50() + url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ + 'retinanet_r50_fpn_1x_coco/'\ + 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' + init_cfg = dict(type='Pretrained', + checkpoint=url, prefix='backbone.') + initialize(model, init_cfg) + ``` + +4. Initialize model inherited from BaseModule, Sequential, ModuleList + + `BaseModule` is inherited from `torch.nn.Module`, and the only different between them is that `BaseModule` implements `init_weight`. + + `Sequential` is inherited from `BaseModule` and `torch.nn.Sequential`. + + `ModuleList` is inherited from `BaseModule` and `torch.nn.ModuleList`. + + `````python + import torch.nn as nn + from mmcv.runner import BaseModule, Sequential, ModuleList + + class FooConv1d(BaseModule): + + def __init__(self, init_cfg=None): + super().__init__(init_cfg) + self.conv1d = nn.Conv1d(4, 1, 4) + + def forward(self, x): + return self.conv1d(x) + + class FooConv2d(BaseModule): + + def __init__(self, init_cfg=None): + super().__init__(init_cfg) + self.conv2d = nn.Conv2d(3, 1, 3) + + def forward(self, x): + return self.conv2d(x) + + # BaseModule + init_cfg = dict(type='Constant', layer='Conv1d', val=0., bias=1.) + model = FooConv1d(init_cfg) + model.init_weights() + # model.conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + + # Sequential + init_cfg1 = dict(type='Constant', layer='Conv1d', val=0., bias=1.) + init_cfg2 = dict(type='Constant', layer='Conv2d', val=2., bias=3.) + model1 = FooConv1d(init_cfg1) + model2 = FooConv2d(init_cfg2) + seq_model = Sequential(model1, model2) + seq_model.init_weights() + # seq_model[0].conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + # seq_model[1].conv2d.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + + # inner init_cfg has higher priority + model1 = FooConv1d(init_cfg1) + model2 = FooConv2d(init_cfg2) + init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) + seq_model = Sequential(model1, model2, init_cfg=init_cfg) + seq_model.init_weights() + # seq_model[0].conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + # seq_model[1].conv2d.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + + # ModuleList + model1 = FooConv1d(init_cfg1) + model2 = FooConv2d(init_cfg2) + modellist = ModuleList([model1, model2]) + modellist.init_weights() + # modellist[0].conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + # modellist[1].conv2d.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + + # inner init_cfg has higher priority + model1 = FooConv1d(init_cfg1) + model2 = FooConv2d(init_cfg2) + init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) + modellist = ModuleList([model1, model2], init_cfg=init_cfg) + modellist.init_weights() + # modellist[0].conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + # modellist[1].conv2d.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + ````` + +### Model Zoo + +Besides torchvision pre-trained models, we also provide pre-trained models of following CNN: + +- VGG Caffe +- ResNet Caffe +- ResNeXt +- ResNet with Group Normalization +- ResNet with Group Normalization and Weight Standardization +- HRNetV2 +- Res2Net +- RegNet + +#### Model URLs in JSON + +The model zoo links in MMCV are managed by JSON files. +The json file consists of key-value pair of model name and its url or path. +An example json file could be like: + +```json +{ + "model_a": "https://example.com/models/model_a_9e5bac.pth", + "model_b": "pretrain/model_b_ab3ef2c.pth" +} +``` + +The default links of the pre-trained models hosted on OpenMMLab AWS could be found [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/model_zoo/open_mmlab.json). + +You may override default links by putting `open-mmlab.json` under `MMCV_HOME`. If `MMCV_HOME` is not find in the environment, `~/.cache/mmcv` will be used by default. You may `export MMCV_HOME=/your/path` to use your own path. + +The external json files will be merged into default one. If the same key presents in both external json and default json, the external one will be used. + +#### Load Checkpoint + +The following types are supported for `filename` argument of `mmcv.load_checkpoint()`. + +- filepath: The filepath of the checkpoint. +- `http://xxx` and `https://xxx`: The link to download the checkpoint. The `SHA256` postfix should be contained in the filename. +- `torchvision://xxx`: The model links in `torchvision.models`.Please refer to [torchvision](https://pytorch.org/docs/stable/torchvision/models.html) for details. +- `open-mmlab://xxx`: The model links or filepath provided in default and additional json files. diff --git a/docs/understand_mmcv/config.md b/docs/understand_mmcv/config.md new file mode 100644 index 0000000..d0b669b --- /dev/null +++ b/docs/understand_mmcv/config.md @@ -0,0 +1,200 @@ +## Config + +`Config` class is used for manipulating config and config files. It supports +loading configs from multiple file formats including **python**, **json** and **yaml**. +It provides dict-like apis to get and set values. + +Here is an example of the config file `test.py`. + +```python +a = 1 +b = dict(b1=[0, 1, 2], b2=None) +c = (1, 2) +d = 'string' +``` + +To load and use configs + +```python +>>> cfg = Config.fromfile('test.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b1=[0, 1, 2], b2=None), +... c=(1, 2), +... d='string') +``` + +For all format configs, some predefined variables are supported. It will convert the variable in `{{ var }}` with its real value. + +Currently, it supports four predefined variables: + +`{{ fileDirname }}` - the current opened file's dirname, e.g. /home/your-username/your-project/folder + +`{{ fileBasename }}` - the current opened file's basename, e.g. file.ext + +`{{ fileBasenameNoExtension }}` - the current opened file's basename with no file extension, e.g. file + +`{{ fileExtname }}` - the current opened file's extension, e.g. .ext + +These variable names are referred from [VS Code](https://code.visualstudio.com/docs/editor/variables-reference). + +Here is one examples of config with predefined variables. + +`config_a.py` + +```python +a = 1 +b = './work_dir/{{ fileBasenameNoExtension }}' +c = '{{ fileExtname }}' +``` + +```python +>>> cfg = Config.fromfile('./config_a.py') +>>> print(cfg) +>>> dict(a=1, +... b='./work_dir/config_a', +... c='.py') +``` + +For all format configs, inheritance is supported. To reuse fields in other config files, +specify `_base_='./config_a.py'` or a list of configs `_base_=['./config_a.py', './config_b.py']`. +Here are 4 examples of config inheritance. + +`config_a.py` + +```python +a = 1 +b = dict(b1=[0, 1, 2], b2=None) +``` + +### Inherit from base config without overlapped keys + +`config_b.py` + +```python +_base_ = './config_a.py' +c = (1, 2) +d = 'string' +``` + +```python +>>> cfg = Config.fromfile('./config_b.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b1=[0, 1, 2], b2=None), +... c=(1, 2), +... d='string') +``` + +New fields in `config_b.py` are combined with old fields in `config_a.py` + +### Inherit from base config with overlapped keys + +`config_c.py` + +```python +_base_ = './config_a.py' +b = dict(b2=1) +c = (1, 2) +``` + +```python +>>> cfg = Config.fromfile('./config_c.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b1=[0, 1, 2], b2=1), +... c=(1, 2)) +``` + +`b.b2=None` in `config_a` is replaced with `b.b2=1` in `config_c.py`. + +### Inherit from base config with ignored fields + +`config_d.py` + +```python +_base_ = './config_a.py' +b = dict(_delete_=True, b2=None, b3=0.1) +c = (1, 2) +``` + +```python +>>> cfg = Config.fromfile('./config_d.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b2=None, b3=0.1), +... c=(1, 2)) +``` + +You may also set `_delete_=True` to ignore some fields in base configs. All old keys `b1, b2, b3` in `b` are replaced with new keys `b2, b3`. + +### Inherit from multiple base configs (the base configs should not contain the same keys) + +`config_e.py` + +```python +c = (1, 2) +d = 'string' +``` + +`config_f.py` + +```python +_base_ = ['./config_a.py', './config_e.py'] +``` + +```python +>>> cfg = Config.fromfile('./config_f.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b1=[0, 1, 2], b2=None), +... c=(1, 2), +... d='string') +``` + +### Reference variables from base + +You can reference variables defined in base using the following grammar. + +`base.py` + +```python +item1 = 'a' +item2 = dict(item3 = 'b') +``` + +`config_g.py` + +```python +_base_ = ['./base.py'] +item = dict(a = {{ _base_.item1 }}, b = {{ _base_.item2.item3 }}) +``` + +```python +>>> cfg = Config.fromfile('./config_g.py') +>>> print(cfg.pretty_text) +item1 = 'a' +item2 = dict(item3='b') +item = dict(a='a', b='b') +``` + +### Add deprecation information in configs + +Deprecation information can be added in a config file, which will trigger a `UserWarning` when this config file is loaded. + +`deprecated_cfg.py` + +```python +_base_ = 'expected_cfg.py' + +_deprecation_ = dict( + expected = 'expected_cfg.py', # optional to show expected config path in the warning information + reference = 'url to related PR' # optional to show reference link in the warning information +) +``` + +```python +>>> cfg = Config.fromfile('./deprecated_cfg.py') + +UserWarning: The config file deprecated.py will be deprecated in the future. Please use expected_cfg.py instead. More information can be found at https://github.com/open-mmlab/mmcv/pull/1275 +``` diff --git a/docs/en/understand_mmcv/data_process.md b/docs/understand_mmcv/data_process.md similarity index 90% rename from docs/en/understand_mmcv/data_process.md rename to docs/understand_mmcv/data_process.md index 167928f..79e9281 100644 --- a/docs/en/understand_mmcv/data_process.md +++ b/docs/understand_mmcv/data_process.md @@ -2,7 +2,7 @@ ### Image -This module provides some image processing methods, which requires `opencv` to be installed first. +This module provides some image processing methods, which requires `opencv` to be installed. #### Read/Write/Show @@ -118,7 +118,7 @@ mmcv.imflip(img, direction='vertical') #### Crop -`imcrop` can crop the image with one or more regions. Each region is represented by the upper left and lower right coordinates as (x1, y1, x2, y2). +`imcrop` can crop the image with one or some regions, represented as (x1, y1, x2, y2). ```python import mmcv @@ -135,12 +135,12 @@ bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]]) patches = mmcv.imcrop(img, bboxes) # crop two regions, and rescale the patches by 1.2x -patches = mmcv.imcrop(img, bboxes, scale=1.2) +patches = mmcv.imcrop(img, bboxes, scale_ratio=1.2) ``` #### Padding -There are two methods, `impad` and `impad_to_multiple`, to pad an image to the +There are two methods `impad` and `impad_to_multiple` to pad an image to the specific size with given values. ```python @@ -150,14 +150,14 @@ img = mmcv.imread('tests/data/color.jpg') img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0) # pad the image to (1000, 1200) with different values for three channels. -img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=(100, 50, 200)) +img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=[100, 50, 200]) # pad the image on left, right, top, bottom borders with all zeros img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0) # pad the image on left, right, top, bottom borders with different values # for three channels. -img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=(100, 50, 200)) +img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=[100, 50, 200]) # pad an image so that each edge is a multiple of some value. img_ = mmcv.impad_to_multiple(img, 32) @@ -165,7 +165,7 @@ img_ = mmcv.impad_to_multiple(img, 32) ### Video -This module provides the following functionalities: +This module provides the following functionalities. - A `VideoReader` class with friendly apis to read and convert videos. - Some methods for editing (cut, concat, resize) videos. @@ -232,7 +232,7 @@ mmcv.resize_video('test.mp4', 'resized2.mp4', ratio=2) - IO - Visualization -- Flow warping +- Flow warpping We provide two options to dump optical flow files: uncompressed and compressed. The uncompressed way just dumps the floating numbers to a binary file. It is @@ -265,12 +265,12 @@ mmcv.flowshow(flow) ![progress](../_static/flow_visualization.png) -3. Flow warping +3. Flow warpping ```python img1 = mmcv.imread('img1.jpg') flow = mmcv.flowread('flow.flo') -warped_img2 = mmcv.flow_warp(img1, flow) +warpped_img2 = mmcv.flow_warp(img1, flow) ``` img1 (left) and img2 (right) @@ -281,6 +281,6 @@ optical flow (img2 -> img1) ![optical flow](../_static/flow_img2toimg1.png) -warped image and difference with ground truth +warpped image and difference with ground truth -![warped image](../_static/flow_warp_diff.png) +![warpped image](../_static/flow_warp_diff.png) diff --git a/docs/understand_mmcv/io.md b/docs/understand_mmcv/io.md new file mode 100644 index 0000000..f6c28dd --- /dev/null +++ b/docs/understand_mmcv/io.md @@ -0,0 +1,247 @@ +## File IO + +This module provides two universal API to load and dump files of different formats. + +```{note} +Since v1.3.16, the IO modules support loading (dumping) data from (to) different backends, respectively. More details are in PR [#1330](https://github.com/open-mmlab/mmcv/pull/1330). +``` + +### Load and dump data + +`mmcv` provides a universal api for loading and dumping data, currently +supported formats are json, yaml and pickle. + +#### Load from disk or dump to disk + +```python +import mmcv + +# load data from a file +data = mmcv.load('test.json') +data = mmcv.load('test.yaml') +data = mmcv.load('test.pkl') +# load data from a file-like object +with open('test.json', 'r') as f: + data = mmcv.load(f, file_format='json') + +# dump data to a string +json_str = mmcv.dump(data, file_format='json') + +# dump data to a file with a filename (infer format from file extension) +mmcv.dump(data, 'out.pkl') + +# dump data to a file with a file-like object +with open('test.yaml', 'w') as f: + data = mmcv.dump(data, f, file_format='yaml') +``` + +#### Load from other backends or dump to other backends + +```python +import mmcv + +# load data from a file +data = mmcv.load('s3://bucket-name/test.json') +data = mmcv.load('s3://bucket-name/test.yaml') +data = mmcv.load('s3://bucket-name/test.pkl') + +# dump data to a file with a filename (infer format from file extension) +mmcv.dump(data, 's3://bucket-name/out.pkl') +``` + +It is also very convenient to extend the api to support more file formats. +All you need to do is to write a file handler inherited from `BaseFileHandler` +and register it with one or several file formats. + +You need to implement at least 3 methods. + +```python +import mmcv + +# To register multiple file formats, a list can be used as the argument. +# @mmcv.register_handler(['txt', 'log']) +@mmcv.register_handler('txt') +class TxtHandler1(mmcv.BaseFileHandler): + + def load_from_fileobj(self, file): + return file.read() + + def dump_to_fileobj(self, obj, file): + file.write(str(obj)) + + def dump_to_str(self, obj, **kwargs): + return str(obj) +``` + +Here is an example of `PickleHandler`. + +```python +import pickle + +class PickleHandler(mmcv.BaseFileHandler): + + def load_from_fileobj(self, file, **kwargs): + return pickle.load(file, **kwargs) + + def load_from_path(self, filepath, **kwargs): + return super(PickleHandler, self).load_from_path( + filepath, mode='rb', **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('protocol', 2) + return pickle.dumps(obj, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('protocol', 2) + pickle.dump(obj, file, **kwargs) + + def dump_to_path(self, obj, filepath, **kwargs): + super(PickleHandler, self).dump_to_path( + obj, filepath, mode='wb', **kwargs) +``` + +### Load a text file as a list or dict + +For example `a.txt` is a text file with 5 lines. + +``` +a +b +c +d +e +``` + +#### Load from disk + +Use `list_from_file` to load the list from a.txt. + +```python +>>> mmcv.list_from_file('a.txt') +['a', 'b', 'c', 'd', 'e'] +>>> mmcv.list_from_file('a.txt', offset=2) +['c', 'd', 'e'] +>>> mmcv.list_from_file('a.txt', max_num=2) +['a', 'b'] +>>> mmcv.list_from_file('a.txt', prefix='/mnt/') +['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] +``` + +For example `b.txt` is a text file with 3 lines. + +``` +1 cat +2 dog cow +3 panda +``` + +Then use `dict_from_file` to load the dict from `b.txt`. + +```python +>>> mmcv.dict_from_file('b.txt') +{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} +>>> mmcv.dict_from_file('b.txt', key_type=int) +{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} +``` + +#### Load from other backends + +Use `list_from_file` to load the list from `s3://bucket-name/a.txt`. + +```python +>>> mmcv.list_from_file('s3://bucket-name/a.txt') +['a', 'b', 'c', 'd', 'e'] +>>> mmcv.list_from_file('s3://bucket-name/a.txt', offset=2) +['c', 'd', 'e'] +>>> mmcv.list_from_file('s3://bucket-name/a.txt', max_num=2) +['a', 'b'] +>>> mmcv.list_from_file('s3://bucket-name/a.txt', prefix='/mnt/') +['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] +``` + +Use `dict_from_file` to load the dict from `s3://bucket-name/b.txt`. + +```python +>>> mmcv.dict_from_file('s3://bucket-name/b.txt') +{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} +>>> mmcv.dict_from_file('s3://bucket-name/b.txt', key_type=int) +{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} +``` + +### Load and dump checkpoints + +#### Load checkpoints from disk or save to disk + +We can read the checkpoints from disk or save to disk in the following way. + +```python +import torch + +filepath1 = '/path/of/your/checkpoint1.pth' +filepath2 = '/path/of/your/checkpoint2.pth' +# read from filepath1 +checkpoint = torch.load(filepath1) +# save to filepath2 +torch.save(checkpoint, filepath2) +``` + +MMCV provides many backends. `HardDiskBackend` is one of them and we can use it to read or save checkpoints. + +```python +import io +from mmcv.fileio.file_client import HardDiskBackend + +disk_backend = HardDiskBackend() +with io.BytesIO(disk_backend.get(filepath1)) as buffer: + checkpoint = torch.load(buffer) +with io.BytesIO() as buffer: + torch.save(checkpoint, f) + disk_backend.put(f.getvalue(), filepath2) +``` + +If we want to implement an interface which automatically select the corresponding +backend based on the file path, we can use the `FileClient`. +For example, we want to implement two methods for reading checkpoints as well as saving checkpoints, +which need to support different types of file paths, either disk paths, network paths or other paths. + +```python +from mmcv.fileio.file_client import FileClient + +def load_checkpoint(path): + file_client = FileClient.infer(uri=path) + with io.BytesIO(file_client.get(path)) as buffer: + checkpoint = torch.load(buffer) + return checkpoint + +def save_checkpoint(checkpoint, path): + with io.BytesIO() as buffer: + torch.save(checkpoint, buffer) + file_client.put(buffer.getvalue(), path) + +file_client = FileClient.infer_client(uri=filepath1) +checkpoint = load_checkpoint(filepath1) +save_checkpoint(checkpoint, filepath2) +``` + +#### Load checkpoints from the Internet + +```{note} +Currently, it only supports reading checkpoints from the Internet, and does not support saving checkpoints to the Internet. +``` + +```python +import io +import torch +from mmcv.fileio.file_client import HTTPBackend, FileClient + +filepath = 'http://path/of/your/checkpoint.pth' +checkpoint = torch.utils.model_zoo.load_url(filepath) + +http_backend = HTTPBackend() +with io.BytesIO(http_backend.get(filepath)) as buffer: + checkpoint = torch.load(buffer) + +file_client = FileClient.infer_client(uri=filepath) +with io.BytesIO(file_client.get(filepath)) as buffer: + checkpoint = torch.load(buffer) +``` diff --git a/docs/understand_mmcv/ops.md b/docs/understand_mmcv/ops.md new file mode 100644 index 0000000..2729e44 --- /dev/null +++ b/docs/understand_mmcv/ops.md @@ -0,0 +1,37 @@ +## CUDA ops + +We implement common CUDA ops used in detection, segmentation, etc. + +- AssignScoreWithK +- BallQuery +- BBoxOverlaps +- CARAFE +- CrissCrossAttention +- ContextBlock +- CornerPool +- Deformable Convolution v1/v2 +- Deformable RoIPool +- DynamicScatter +- GatherPoints +- FurthestPointSample +- FurthestPointSampleWithDist +- GeneralizedAttention +- GroupPoints +- KNN +- MaskedConv +- NMS +- PSAMask +- RoIPointPool3d +- RoIPool +- RoIAlign +- RoIAwarePool3d +- SimpleRoIAlign +- SigmoidFocalLoss +- SoftmaxFocalLoss +- SoftNMS +- Synchronized BatchNorm +- Voxelization +- ThreeInterpolate +- ThreeNN +- Weight standardization +- Correlation diff --git a/docs/understand_mmcv/registry.md b/docs/understand_mmcv/registry.md new file mode 100644 index 0000000..2cf1081 --- /dev/null +++ b/docs/understand_mmcv/registry.md @@ -0,0 +1,155 @@ +## Registry + +MMCV implements [registry](https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/registry.py) to manage different modules that share similar functionalities, e.g., backbones, head, and necks, in detectors. +Most projects in OpenMMLab use registry to manage modules of datasets and models, such as [MMDetection](https://github.com/open-mmlab/mmdetection), [MMDetection3D](https://github.com/open-mmlab/mmdetection3d), [MMClassification](https://github.com/open-mmlab/mmclassification), [MMEditing](https://github.com/open-mmlab/mmediting), etc. + +### What is registry + +In MMCV, registry can be regarded as a mapping that maps a class to a string. +These classes contained by a single registry usually have similar APIs but implement different algorithms or support different datasets. +With the registry, users can find and instantiate the class through its corresponding string, and use the instantiated module as they want. +One typical example is the config systems in most OpenMMLab projects, which use the registry to create hooks, runners, models, and datasets, through configs. +The API reference could be found [here](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.Registry). + +To manage your modules in the codebase by `Registry`, there are three steps as below. + +1. Create a build method (optional, in most cases you can just use the default one). +2. Create a registry. +3. Use this registry to manage the modules. + +`build_func` argument of `Registry` is to customize how to instantiate the class instance, the default one is `build_from_cfg` implemented [here](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.build_from_cfg). + +### A Simple Example + +Here we show a simple example of using registry to manage modules in a package. +You can find more practical examples in OpenMMLab projects. + +Assuming we want to implement a series of Dataset Converter for converting different formats of data to the expected data format. +We create a directory as a package named `converters`. +In the package, we first create a file to implement builders, named `converters/builder.py`, as below + +```python +from mmcv.utils import Registry +# create a registry for converters +CONVERTERS = Registry('converter') +``` + +Then we can implement different converters in the package. For example, implement `Converter1` in `converters/converter1.py` + +```python + +from .builder import CONVERTERS + +# use the registry to manage the module +@CONVERTERS.register_module() +class Converter1(object): + def __init__(self, a, b): + self.a = a + self.b = b +``` + +The key step to use registry for managing the modules is to register the implemented module into the registry `CONVERTERS` through +`@CONVERTERS.register_module()` when you are creating the module. By this way, a mapping between a string and the class is built and maintained by `CONVERTERS` as below + +```python +'Converter1' -> +``` + +If the module is successfully registered, you can use this converter through configs as + +```python +converter_cfg = dict(type='Converter1', a=a_value, b=b_value) +converter = CONVERTERS.build(converter_cfg) +``` + +### Customize Build Function + +Suppose we would like to customize how `converters` are built, we could implement a customized `build_func` and pass it into the registry. + +```python +from mmcv.utils import Registry + +# create a build function +def build_converter(cfg, registry, *args, **kwargs): + cfg_ = cfg.copy() + converter_type = cfg_.pop('type') + if converter_type not in registry: + raise KeyError(f'Unrecognized converter type {converter_type}') + else: + converter_cls = registry.get(converter_type) + + converter = converter_cls(*args, **kwargs, **cfg_) + return converter + +# create a registry for converters and pass ``build_converter`` function +CONVERTERS = Registry('converter', build_func=build_converter) +``` + +```{note} +In this example, we demonstrate how to use the `build_func` argument to customize the way to build a class instance. +The functionality is similar to the default `build_from_cfg`. In most cases, default one would be sufficient. +`build_model_from_cfg` is also implemented to build PyTorch module in `nn.Sequentail`, you may directly use them instead of implementing by yourself. +``` + +### Hierarchy Registry + +You could also build modules from more than one OpenMMLab frameworks, e.g. you could use all backbones in [MMClassification](https://github.com/open-mmlab/mmclassification) for object detectors in [MMDetection](https://github.com/open-mmlab/mmdetection), you may also combine an object detection model in [MMDetection](https://github.com/open-mmlab/mmdetection) and semantic segmentation model in [MMSegmentation](https://github.com/open-mmlab/mmsegmentation). + +All `MODELS` registries of downstream codebases are children registries of MMCV's `MODELS` registry. +Basically, there are two ways to build a module from child or sibling registries. + +1. Build from children registries. + + For example: + + In MMDetection we define: + + ```python + from mmcv.utils import Registry + from mmcv.cnn import MODELS as MMCV_MODELS + MODELS = Registry('model', parent=MMCV_MODELS) + + @MODELS.register_module() + class NetA(nn.Module): + def forward(self, x): + return x + ``` + + In MMClassification we define: + + ```python + from mmcv.utils import Registry + from mmcv.cnn import MODELS as MMCV_MODELS + MODELS = Registry('model', parent=MMCV_MODELS) + + @MODELS.register_module() + class NetB(nn.Module): + def forward(self, x): + return x + 1 + ``` + + We could build two net in either MMDetection or MMClassification by: + + ```python + from mmdet.models import MODELS + net_a = MODELS.build(cfg=dict(type='NetA')) + net_b = MODELS.build(cfg=dict(type='mmcls.NetB')) + ``` + + or + + ```python + from mmcls.models import MODELS + net_a = MODELS.build(cfg=dict(type='mmdet.NetA')) + net_b = MODELS.build(cfg=dict(type='NetB')) + ``` + +2. Build from parent registry. + + The shared `MODELS` registry in MMCV is the parent registry for all downstream codebases (root registry): + + ```python + from mmcv.cnn import MODELS as MMCV_MODELS + net_a = MMCV_MODELS.build(cfg=dict(type='mmdet.NetA')) + net_b = MMCV_MODELS.build(cfg=dict(type='mmcls.NetB')) + ``` diff --git a/docs/understand_mmcv/runner.md b/docs/understand_mmcv/runner.md new file mode 100644 index 0000000..2e6e386 --- /dev/null +++ b/docs/understand_mmcv/runner.md @@ -0,0 +1,163 @@ +## Runner + +The runner class is designed to manage the training. It eases the training process with less code demanded from users while staying flexible and configurable. The main features are as listed: + +- Support `EpochBasedRunner` and `IterBasedRunner` for different scenarios. Implementing customized runners is also allowed to meet customized needs. +- Support customized workflow to allow switching between different modes while training. Currently, supported modes are train and val. +- Enable extensibility through various hooks, including hooks defined in MMCV and customized ones. + +### EpochBasedRunner + +As its name indicates, workflow in `EpochBasedRunner` should be set based on epochs. For example, [('train', 2), ('val', 1)] means running 2 epochs for training and 1 epoch for validation, iteratively. And each epoch may contain multiple iterations. Currently, MMDetection uses `EpochBasedRunner` by default. + +Let's take a look at its core logic: + +```python +# the condition to stop training +while curr_epoch < max_epochs: + # traverse the workflow. + # e.g. workflow = [('train', 2), ('val', 1)] + for i, flow in enumerate(workflow): + # mode(e.g. train) determines which function to run + mode, epochs = flow + # epoch_runner will be either self.train() or self.val() + epoch_runner = getattr(self, mode) + # execute the corresponding function + for _ in range(epochs): + epoch_runner(data_loaders[i], **kwargs) +``` + +Currently, we support 2 modes: train and val. Let's take a train function for example and have a look at its core logic: + +```python +# Currently, epoch_runner could be either train or val +def train(self, data_loader, **kwargs): + # traverse the dataset and get batch data for 1 epoch + for i, data_batch in enumerate(data_loader): + # it will execute all before_train_iter function in the hooks registered. You may want to watch out for the order. + self.call_hook('before_train_iter') + # set train_mode as False in val function + self.run_iter(data_batch, train_mode=True, **kwargs) + self.call_hook('after_train_iter') + self.call_hook('after_train_epoch') +``` + +### IterBasedRunner + +Different from `EpochBasedRunner`, workflow in `IterBasedRunner` should be set based on iterations. For example, [('train', 2), ('val', 1)] means running 2 iters for training and 1 iter for validation, iteratively. Currently, MMSegmentation uses `IterBasedRunner` by default. + +Let's take a look at its core logic: + +```python +# Although we set workflow by iters here, we might also need info on the epochs in some using cases. That can be provided by IterLoader. +iter_loaders = [IterLoader(x) for x in data_loaders] +# the condition to stop training +while curr_iter < max_iters: + # traverse the workflow. + # e.g. workflow = [('train', 2), ('val', 1)] + for i, flow in enumerate(workflow): + # mode(e.g. train) determines which function to run + mode, iters = flow + # iter_runner will be either self.train() or self.val() + iter_runner = getattr(self, mode) + # execute the corresponding function + for _ in range(iters): + iter_runner(iter_loaders[i], **kwargs) +``` + +Currently, we support 2 modes: train and val. Let's take a val function for example and have a look at its core logic: + +```python +# Currently, iter_runner could be either train or val +def val(self, data_loader, **kwargs): + # get batch data for 1 iter + data_batch = next(data_loader) + # it will execute all before_val_iter function in the hooks registered. You may want to watch out for the order. + self.call_hook('before_val_iter') + outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) + self.outputs = outputs + self.call_hook('after_val_iter') +``` + +Other than the basic functionalities explained above, `EpochBasedRunner` and `IterBasedRunner` provide methods such as `resume`, `save_checkpoint` and `register_hook`. In case you are not familiar with the term Hook mentioned earlier, we will also provide a tutorial about it.(coming soon...) Essentially, a hook is functionality to alter or augment the code behaviors through predefined api. It allows users to have their own code called under certain circumstances. It makes code extensible in a non-intrusive manner. + +### A Simple Example + +We will walk you through the usage of runner with a classification task. The following code only contains essential steps for demonstration purposes. The following steps are necessary for any training tasks. + +**(1) Initialize dataloader, model, optimizer, etc.** + +```python +# initialize model +model=... +# initialize optimizer, typically, we set: cfg.optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) +optimizer = build_optimizer(model, cfg.optimizer) +# initialize the dataloader corresponding to the workflow(train/val) +data_loaders = [ + build_dataloader( + ds, + cfg.data.samples_per_gpu, + cfg.data.workers_per_gpu, + ...) for ds in dataset + ] +``` + +**(2) Initialize runner** + +```python +runner = build_runner( + # cfg.runner is typically set as: + # runner = dict(type='EpochBasedRunner', max_epochs=200) + cfg.runner, + default_args=dict( + model=model, + batch_processor=None, + optimizer=optimizer, + logger=logger)) +``` + +**(3) Register training hooks and customized hooks.** + +```python +# register default hooks necessary for training +runner.register_training_hooks( + # configs of learning rate, it is typically set as: + # lr_config = dict(policy='step', step=[100, 150]) + cfg.lr_config, + # configuration of optimizer, e.g. grad_clip + optimizer_config, + # configuration of saving checkpoints, it is typically set as: + # checkpoint_config = dict(interval=1), saving checkpoints every epochs + cfg.checkpoint_config, + # configuration of logs + cfg.log_config, + ...) + +# register customized hooks +# say we want to enable ema, then we could set custom_hooks=[dict(type='EMAHook')] +if cfg.get('custom_hooks', None): + custom_hooks = cfg.custom_hooks + for hook_cfg in cfg.custom_hooks: + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = build_from_cfg(hook_cfg, HOOKS) + runner.register_hook(hook, priority=priority) +``` + +Then, we can use `resume` or `load_checkpoint` to load existing weights. + +**(4) Start training** + +```python +# workflow is typically set as: workflow = [('train', 1)] +# here the training begins. +runner.run(data_loaders, cfg.workflow) +``` + +Let's take `EpochBasedRunner` for example and go a little bit into details about setting workflow: + +- Say we only want to put train in the workflow, then we can set: workflow = [('train', 1)]. The runner will only execute train iteratively in this case. +- Say we want to put both train and val in the workflow, then we can set: workflow = [('train', 3), ('val',1)]. The runner will first execute train for 3 epochs and then switch to val mode and execute val for 1 epoch. The workflow will be repeated until the current epoch hit the max_epochs. +- Workflow is highly flexible. Therefore, you can set workflow = [('val', 1), ('train',1)] if you would like the runner to validate first and train after. + +The code we demonstrated above is already in `train.py` in MM repositories. Simply modify the corresponding keys in the configuration files and the script will execute the expected workflow automatically. diff --git a/docs/understand_mmcv/utils.md b/docs/understand_mmcv/utils.md new file mode 100644 index 0000000..5d5e0ad --- /dev/null +++ b/docs/understand_mmcv/utils.md @@ -0,0 +1,74 @@ +## Utils + +### ProgressBar + +If you want to apply a method to a list of items and track the progress, `track_progress` +is a good choice. It will display a progress bar to tell the progress and ETA. + +```python +import mmcv + +def func(item): + # do something + pass + +tasks = [item_1, item_2, ..., item_n] + +mmcv.track_progress(func, tasks) +``` + +The output is like the following. + +![progress](../_static/progress.*) + +There is another method `track_parallel_progress`, which wraps multiprocessing and +progress visualization. + +```python +mmcv.track_parallel_progress(func, tasks, 8) # 8 workers +``` + +![progress](../_static/parallel_progress.*) + +If you want to iterate or enumerate a list of items and track the progress, `track_iter_progress` +is a good choice. It will display a progress bar to tell the progress and ETA. + +```python +import mmcv + +tasks = [item_1, item_2, ..., item_n] + +for task in mmcv.track_iter_progress(tasks): + # do something like print + print(task) + +for i, task in enumerate(mmcv.track_iter_progress(tasks)): + # do something like print + print(i) + print(task) +``` + +### Timer + +It is convenient to compute the runtime of a code block with `Timer`. + +```python +import time + +with mmcv.Timer(): + # simulate some code block + time.sleep(1) +``` + +or try with `since_start()` and `since_last_check()`. This former can +return the runtime since the timer starts and the latter will return the time +since the last time checked. + +```python +timer = mmcv.Timer() +# code block 1 here +print(timer.since_start()) +# code block 2 here +print(timer.since_last_check()) +print(timer.since_start()) +``` diff --git a/docs/en/understand_mmcv/visualization.md b/docs/understand_mmcv/visualization.md similarity index 100% rename from docs/en/understand_mmcv/visualization.md rename to docs/understand_mmcv/visualization.md diff --git a/docs/zh_cn/_static/version.json b/docs/zh_cn/_static/version.json deleted file mode 100644 index 7ee4965..0000000 --- a/docs/zh_cn/_static/version.json +++ /dev/null @@ -1,575 +0,0 @@ -{ - "Linux": [ - { - "cuda": "11.7", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "11.6", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "11.6", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.5", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.0", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "9.2", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "9.2", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "cpu", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - } - ], - "Windows": [ - { - "cuda": "11.7", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "11.6", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "11.6", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.5", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.3", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "11.1", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.2", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "10.2", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "10.1", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "10.1", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "cpu", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - }, - { - "cuda": "cpu", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2", - "2.0.0rc1" - ] - } - ], - "macOS": [ - { - "cuda": "cpu", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "mps", - "torch": "1.13.x", - "mmcv": [ - "2.0.0rc3" - ] - }, - { - "cuda": "cpu", - "torch": "1.12.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.11.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.10.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.9.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.8.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.7.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - }, - { - "cuda": "cpu", - "torch": "1.6.x", - "mmcv": [ - "2.0.0rc3", - "2.0.0rc2" - ] - } - ] -} diff --git a/docs/zh_cn/_templates/classtemplate.rst b/docs/zh_cn/_templates/classtemplate.rst deleted file mode 100644 index 4f74842..0000000 --- a/docs/zh_cn/_templates/classtemplate.rst +++ /dev/null @@ -1,14 +0,0 @@ -.. role:: hidden - :class: hidden-section -.. currentmodule:: {{ module }} - - -{{ name | underline}} - -.. autoclass:: {{ name }} - :members: - - -.. - autogenerated from source/_templates/classtemplate.rst - note it does not have :inherited-members: diff --git a/docs/zh_cn/api/arraymisc.rst b/docs/zh_cn/api/arraymisc.rst deleted file mode 100644 index 28975eb..0000000 --- a/docs/zh_cn/api/arraymisc.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.arraymisc -=================================== - -.. contents:: mmcv.arraymisc - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.arraymisc - -.. autosummary:: - :toctree: generated - :nosignatures: - - quantize - dequantize diff --git a/docs/zh_cn/api/cnn.rst b/docs/zh_cn/api/cnn.rst deleted file mode 100644 index 022191f..0000000 --- a/docs/zh_cn/api/cnn.rst +++ /dev/null @@ -1,71 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.cnn -=================================== - -.. contents:: mmcv.cnn - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.cnn - -Module ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - ContextBlock - Conv2d - Conv3d - ConvAWS2d - ConvModule - ConvTranspose2d - ConvTranspose3d - ConvWS2d - DepthwiseSeparableConvModule - GeneralizedAttention - HSigmoid - HSwish - LayerScale - Linear - MaxPool2d - MaxPool3d - NonLocal1d - NonLocal2d - NonLocal3d - Scale - Swish - Conv2dRFSearchOp - -Build Function ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - build_activation_layer - build_conv_layer - build_norm_layer - build_padding_layer - build_plugin_layer - build_upsample_layer - -Miscellaneous ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - fuse_conv_bn - conv_ws_2d - is_norm - make_res_layer - make_vgg_layer - get_model_complexity_info diff --git a/docs/zh_cn/api/image.rst b/docs/zh_cn/api/image.rst deleted file mode 100644 index 3b93484..0000000 --- a/docs/zh_cn/api/image.rst +++ /dev/null @@ -1,100 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.image -=================================== - -.. contents:: mmcv.image - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.image - -IO ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - imfrombytes - imread - imwrite - use_backend - -Color Space ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - bgr2gray - bgr2hls - bgr2hsv - bgr2rgb - bgr2ycbcr - gray2bgr - gray2rgb - hls2bgr - hsv2bgr - imconvert - rgb2bgr - rgb2gray - rgb2ycbcr - ycbcr2bgr - ycbcr2rgb - -Geometric ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - cutout - imcrop - imflip - impad - impad_to_multiple - imrescale - imresize - imresize_like - imresize_to_multiple - imrotate - imshear - imtranslate - rescale_size - -Photometric ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - adjust_brightness - adjust_color - adjust_contrast - adjust_hue - adjust_lighting - adjust_sharpness - auto_contrast - clahe - imdenormalize - imequalize - iminvert - imnormalize - lut_transform - posterize - solarize - -Miscellaneous ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - tensor2imgs diff --git a/docs/zh_cn/api/ops.rst b/docs/zh_cn/api/ops.rst deleted file mode 100644 index b029045..0000000 --- a/docs/zh_cn/api/ops.rst +++ /dev/null @@ -1,135 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.ops -=================================== - -.. contents:: mmcv.ops - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.ops - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - BorderAlign - CARAFE - CARAFENaive - CARAFEPack - Conv2d - ConvTranspose2d - CornerPool - Correlation - CrissCrossAttention - DeformConv2d - DeformConv2dPack - DeformRoIPool - DeformRoIPoolPack - DynamicScatter - FusedBiasLeakyReLU - GroupAll - Linear - MaskedConv2d - MaxPool2d - ModulatedDeformConv2d - ModulatedDeformConv2dPack - ModulatedDeformRoIPoolPack - MultiScaleDeformableAttention - PSAMask - PointsSampler - PrRoIPool - QueryAndGroup - RiRoIAlignRotated - RoIAlign - RoIAlignRotated - RoIAwarePool3d - RoIPointPool3d - RoIPool - SAConv2d - SigmoidFocalLoss - SimpleRoIAlign - SoftmaxFocalLoss - SparseConv2d - SparseConv3d - SparseConvTensor - SparseConvTranspose2d - SparseConvTranspose3d - SparseInverseConv2d - SparseInverseConv3d - SparseMaxPool2d - SparseMaxPool3d - SparseModule - SparseSequential - SubMConv2d - SubMConv3d - SyncBatchNorm - TINShift - Voxelization - -.. autosummary:: - :toctree: generated - :nosignatures: - - active_rotated_filter - assign_score_withk - ball_query - batched_nms - bbox_overlaps - border_align - box_iou_rotated - boxes_iou3d - boxes_iou_bev - boxes_overlap_bev - carafe - carafe_naive - chamfer_distance - contour_expand - convex_giou - convex_iou - deform_conv2d - deform_roi_pool - diff_iou_rotated_2d - diff_iou_rotated_3d - dynamic_scatter - furthest_point_sample - furthest_point_sample_with_dist - fused_bias_leakyrelu - gather_points - grouping_operation - knn - masked_conv2d - min_area_polygons - modulated_deform_conv2d - nms - nms3d - nms3d_normal - nms_bev - nms_match - nms_normal_bev - nms_rotated - pixel_group - point_sample - points_in_boxes_all - points_in_boxes_cpu - points_in_boxes_part - points_in_polygons - prroi_pool - rel_roi_point_to_rel_img_point - riroi_align_rotated - roi_align - roi_align_rotated - roi_pool - rotated_feature_align - scatter_nd - sigmoid_focal_loss - soft_nms - softmax_focal_loss - three_interpolate - three_nn - tin_shift - upfirdn2d - voxelization diff --git a/docs/zh_cn/api/transforms.rst b/docs/zh_cn/api/transforms.rst deleted file mode 100644 index b080133..0000000 --- a/docs/zh_cn/api/transforms.rst +++ /dev/null @@ -1,60 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.transforms -=================================== - -.. currentmodule:: mmcv.transforms - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - BaseTransform - TestTimeAug - -Loading ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - LoadAnnotations - LoadImageFromFile - -Processing ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - CenterCrop - MultiScaleFlipAug - Normalize - Pad - RandomChoiceResize - RandomFlip - RandomGrayscale - RandomResize - Resize - ToTensor - ImageToTensor - -Wrapper ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - Compose - KeyMapper - RandomApply - RandomChoice - TransformBroadcaster diff --git a/docs/zh_cn/api/utils.rst b/docs/zh_cn/api/utils.rst deleted file mode 100644 index f2ff4c2..0000000 --- a/docs/zh_cn/api/utils.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.utils -=================================== - -.. contents:: mmcv.utils - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.utils - -.. autosummary:: - :toctree: generated - :nosignatures: - - IS_CUDA_AVAILABLE - IS_MLU_AVAILABLE - IS_MPS_AVAILABLE - collect_env - jit - skip_no_elena diff --git a/docs/zh_cn/api/video.rst b/docs/zh_cn/api/video.rst deleted file mode 100644 index a6ebca0..0000000 --- a/docs/zh_cn/api/video.rst +++ /dev/null @@ -1,56 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.video -=================================== - -.. contents:: mmcv.video - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.video - -IO ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - VideoReader - Cache - -.. autosummary:: - :toctree: generated - :nosignatures: - - frames2video - -Optical Flow ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - dequantize_flow - flow_from_bytes - flow_warp - flowread - flowwrite - quantize_flow - sparse_flow_from_bytes - -Video Processing ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - concat_video - convert_video - cut_video - resize_video diff --git a/docs/zh_cn/api/visualization.rst b/docs/zh_cn/api/visualization.rst deleted file mode 100644 index 8f43ef2..0000000 --- a/docs/zh_cn/api/visualization.rst +++ /dev/null @@ -1,50 +0,0 @@ -.. role:: hidden - :class: hidden-section - -mmcv.visualization -=================================== - -.. contents:: mmcv.visualization - :depth: 2 - :local: - :backlinks: top - -.. currentmodule:: mmcv.visualization - -Color ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - :template: classtemplate.rst - - Color - -.. autosummary:: - :toctree: generated - :nosignatures: - - color_val - -Image ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - imshow - imshow_bboxes - imshow_det_bboxes - -Optical Flow ----------------- - -.. autosummary:: - :toctree: generated - :nosignatures: - - flow2rgb - flowshow - make_color_wheel diff --git a/docs/zh_cn/community/code_style.md b/docs/zh_cn/community/code_style.md deleted file mode 100644 index 8ddb87c..0000000 --- a/docs/zh_cn/community/code_style.md +++ /dev/null @@ -1,609 +0,0 @@ -## 代ç è§„范 - -### 代ç è§„范标准 - -#### PEP 8 —— Python 官方代ç è§„范 - -[Python 官方的代ç é£Žæ ¼æŒ‡å—](https://www.python.org/dev/peps/pep-0008/),包å«äº†ä»¥ä¸‹å‡ ä¸ªæ–¹é¢çš„内容: - -- 代ç å¸ƒå±€ï¼Œä»‹ç»äº† Python ä¸­ç©ºè¡Œã€æ–­è¡Œä»¥åŠå¯¼å…¥ç›¸å…³çš„代ç é£Žæ ¼è§„范。比如一个常è§çš„问题:当我的代ç è¾ƒé•¿ï¼Œæ— æ³•在一行写下时,何处å¯ä»¥æ–­è¡Œï¼Ÿ - -- 表达å¼ï¼Œä»‹ç»äº† Python 中表达å¼ç©ºæ ¼ç›¸å…³çš„一些风格规范。 - -- å°¾éšé€—å·ç›¸å…³çš„规范。当列表较长,无法一行写下而写æˆå¦‚下é€è¡Œåˆ—表时,推è在末项åŽåР逗å·ï¼Œä»Žè€Œä¾¿äºŽè¿½åР选项ã€ç‰ˆæœ¬æŽ§åˆ¶ç­‰ã€‚ - - ```python - # Correct: - FILES = ['setup.cfg', 'tox.ini'] - # Correct: - FILES = [ - 'setup.cfg', - 'tox.ini', - ] - # Wrong: - FILES = ['setup.cfg', 'tox.ini',] - # Wrong: - FILES = [ - 'setup.cfg', - 'tox.ini' - ] - ``` - -- 命åç›¸å…³è§„èŒƒã€æ³¨é‡Šç›¸å…³è§„范ã€ç±»åž‹æ³¨è§£ç›¸å…³è§„范,我们将在åŽç»­ç« èЂ䏭åšè¯¦ç»†ä»‹ç»ã€‚ - - "A style guide is about consistency. Consistency with this style guide is important. Consistency within a project is more important. Consistency within one module or function is the most important." PEP 8 -- Style Guide for Python Code - -:::{note} -PEP 8 的代ç è§„èŒƒå¹¶ä¸æ˜¯ç»å¯¹çš„,项目内的一致性è¦ä¼˜å…ˆäºŽ PEP 8 的规范。OpenMMLab å„个项目都在 setup.cfg 设定了一些代ç è§„范的设置,请éµç…§è¿™äº›è®¾ç½®ã€‚ä¸€ä¸ªä¾‹å­æ˜¯åœ¨ PEP 8 中有如下一个例å­ï¼š - -```python -# Correct: -hypot2 = x*x + y*y -# Wrong: -hypot2 = x * x + y * y -``` - -这一规范是为了指示ä¸åŒä¼˜å…ˆçº§ï¼Œä½† OpenMMLab 的设置中通常没有å¯ç”¨ yapf çš„ `ARITHMETIC_PRECEDENCE_INDICATION` 选项,因而格å¼è§„范工具ä¸ä¼šæŒ‰ç…§æŽ¨èæ ·å¼æ ¼å¼åŒ–,以设置为准。 -::: - -#### Google å¼€æºé¡¹ç›®é£Žæ ¼æŒ‡å— - -[Google 使用的编程风格指å—](https://google.github.io/styleguide/pyguide.html),包括了 Python 相关的章节。相较于 PEP 8ï¼Œè¯¥æŒ‡å—æä¾›äº†æ›´ä¸ºè¯¦å°½çš„ä»£ç æŒ‡å—。该指å—包括了语言规范和风格规范两个部分。 - -其中,语言规范对 Python 中很多语言特性进行了优缺点的分æžï¼Œå¹¶ç»™å‡ºäº†ä½¿ç”¨æŒ‡å¯¼æ„è§ï¼Œå¦‚异常ã€Lambda 表达å¼ã€åˆ—表推导å¼ã€metaclass 等。 - -风格规范的内容与 PEP 8 较为接近,大部分约定建立在 PEP 8 的基础上,也有一些更为详细的约定,如函数长度ã€TODO æ³¨é‡Šã€æ–‡ä»¶ä¸Ž socket 对象的访问等。 - -推è将该指å—作为å‚考进行开å‘,但ä¸å¿…严格éµç…§ï¼Œä¸€æ¥è¯¥æŒ‡å—存在一些 Python 2 兼容需求,例如指å—ä¸­è¦æ±‚所有无基类的类应当显å¼åœ°ç»§æ‰¿ Object, 而在仅使用 Python 3 çš„çŽ¯å¢ƒä¸­ï¼Œè¿™ä¸€è¦æ±‚是ä¸å¿…è¦çš„ï¼Œä¾æœ¬é¡¹ç›®ä¸­çš„æƒ¯ä¾‹å³å¯ã€‚äºŒæ¥ OpenMMLab 的项目作为框架级的开æºè½¯ä»¶ï¼Œä¸å¿…对一些高级技巧过于é¿è®³ï¼Œå°¤å…¶æ˜¯ MMCV。但å°è¯•使用这些技巧å‰åº”当认真考虑是å¦çœŸçš„æœ‰å¿…è¦ï¼Œå¹¶å¯»æ±‚å…¶ä»–å¼€å‘人员的广泛评估。 - -å¦å¤–éœ€è¦æ³¨æ„的一处规范是关于包的导入,在该指å—ä¸­ï¼Œè¦æ±‚导入本地包时必须使用路径全称,且导入的æ¯ä¸€ä¸ªæ¨¡å—都应当å•独æˆè¡Œï¼Œé€šå¸¸è¿™æ˜¯ä¸å¿…è¦çš„,而且也ä¸ç¬¦åˆç›®å‰é¡¹ç›®çš„开呿ƒ¯ä¾‹ï¼Œæ­¤å¤„进行如下约定: - -```python -# Correct -from mmcv.cnn.bricks import (Conv2d, build_norm_layer, DropPath, MaxPool2d, - Linear) -from ..utils import ext_loader - -# Wrong -from mmcv.cnn.bricks import Conv2d, build_norm_layer, DropPath, MaxPool2d, \ - Linear # 使用括å·è¿›è¡Œè¿žæŽ¥ï¼Œè€Œä¸æ˜¯åæ–œæ  -from ...utils import is_str # 最多å‘上回溯一层,过多的回溯容易导致结构混乱 -``` - -OpenMMLab 项目使用 pre-commit 工具自动格å¼åŒ–代ç ï¼Œè¯¦æƒ…è§[贡献代ç ](./contributing.md#代ç é£Žæ ¼)。 - -### 命å规范 - -#### 命å规范的é‡è¦æ€§ - -ä¼˜ç§€çš„å‘½åæ˜¯è‰¯å¥½ä»£ç å¯è¯»çš„基础。基础的命å规范对å„ç±»å˜é‡çš„命ååšäº†è¦æ±‚,使读者å¯ä»¥æ–¹ä¾¿åœ°æ ¹æ®ä»£ç å了解å˜é‡æ˜¯ä¸€ä¸ªç±» / 局部å˜é‡ / 全局å˜é‡ç­‰ã€‚而优秀的命å则需è¦ä»£ç ä½œè€…对于å˜é‡çš„功能有清晰的认识,以åŠè‰¯å¥½çš„表达能力,从而使读者根æ®å称就能了解其å«ä¹‰ï¼Œç”šè‡³å¸®åŠ©äº†è§£è¯¥æ®µä»£ç çš„功能。 - -#### 基础命å规范 - -| 类型 | 公有 | ç§æœ‰ | -| --------------- | ---------------- | ------------------ | -| æ¨¡å— | lower_with_under | \_lower_with_under | -| 包 | lower_with_under | | -| ç±» | CapWords | \_CapWords | -| 异常 | CapWordsError | | -| 函数(方法) | lower_with_under | \_lower_with_under | -| 函数 / æ–¹æ³•å‚æ•° | lower_with_under | | -| 全局 / ç±»å†…å¸¸é‡ | CAPS_WITH_UNDER | \_CAPS_WITH_UNDER | -| 全局 / 类内å˜é‡ | lower_with_under | \_lower_with_under | -| å˜é‡ | lower_with_under | \_lower_with_under | -| 局部å˜é‡ | lower_with_under | | - -注æ„: - -- å°½é‡é¿å…å˜é‡å与ä¿ç•™å­—冲çªï¼Œç‰¹æ®Šæƒ…况下如ä¸å¯é¿å…,å¯ä½¿ç”¨ä¸€ä¸ªåŽç½®ä¸‹åˆ’线,如 class\_ -- å°½é‡ä¸è¦ä½¿ç”¨è¿‡äºŽç®€å•的命å,除了约定俗æˆçš„循环å˜é‡ i,文件å˜é‡ f,错误å˜é‡ e 等。 -- ä¸ä¼šè¢«ç”¨åˆ°çš„å˜é‡å¯ä»¥å‘½å为 \_,逻辑检查器会将其忽略。 - -#### å‘½åæŠ€å·§ - -良好的å˜é‡å‘½å需è¦ä¿è¯ä¸‰ç‚¹ï¼š - -1. å«ä¹‰å‡†ç¡®ï¼Œæ²¡æœ‰æ­§ä¹‰ -2. 长短适中 -3. å‰åŽç»Ÿä¸€ - -```python -# Wrong -class Masks(metaclass=ABCMeta): # 命志 æ³•表现基类;Instance or Semantic? - pass - -# Correct -class BaseInstanceMasks(metaclass=ABCMeta): - pass - -# Wrong,ä¸åŒåœ°æ–¹å«ä¹‰ç›¸åŒçš„å˜é‡å°½é‡ç”¨ç»Ÿä¸€çš„命å -def __init__(self, inplanes, planes): - pass - -def __init__(self, in_channels, out_channels): - pass -``` - -常è§çš„å‡½æ•°å‘½åæ–¹æ³•: - -- åŠ¨å®¾å‘½åæ³•:crop_img, init_weights -- åŠ¨å®¾å€’ç½®å‘½åæ³•:imread, bbox_flip - -注æ„函数命åä¸Žå‚æ•°çš„顺åºï¼Œä¿è¯ä¸»è¯­åœ¨å‰ï¼Œç¬¦åˆè¯­è¨€ä¹ æƒ¯ï¼š - -- check_keys_exist(key, container) -- check_keys_contain(container, key) - -注æ„é¿å…éžå¸¸è§„或统一约定的缩写,如 nb -> num_blocks,in_nc -> in_channels - -### docstring 规范 - -#### 为什么è¦å†™ docstring - -docstring 是对一个类ã€ä¸€ä¸ªå‡½æ•°åŠŸèƒ½ä¸Ž API 接å£çš„详细æè¿°ï¼Œæœ‰ä¸¤ä¸ªåŠŸèƒ½ï¼Œä¸€æ˜¯å¸®åŠ©å…¶ä»–å¼€å‘者了解代ç åŠŸèƒ½ï¼Œæ–¹ä¾¿ debug å’Œå¤ç”¨ä»£ç ï¼›äºŒæ˜¯åœ¨ Readthedocs 文档中自动生æˆç›¸å…³çš„ API reference 文档,帮助ä¸äº†è§£æºä»£ç çš„社区用户使用相关功能。 - -#### 如何写 docstring - -与注释ä¸åŒï¼Œä¸€ä»½è§„范的 docstring 有ç€ä¸¥æ ¼çš„æ ¼å¼è¦æ±‚,以便于 Python è§£é‡Šå™¨ä»¥åŠ sphinx 进行文档解æžï¼Œè¯¦ç»†çš„ docstring 约定å‚è§ [PEP 257](https://www.python.org/dev/peps/pep-0257/)。此处以例å­çš„å½¢å¼ä»‹ç»å„ç§æ–‡æ¡£çš„æ ‡å‡†æ ¼å¼ï¼Œå‚考格å¼ä¸º [Google 风格](https://zh-google-styleguide.readthedocs.io/en/latest/google-python-styleguide/python_style_rules/#comments)。 - -1. æ¨¡å—æ–‡æ¡£ - - 代ç é£Žæ ¼è§„范推è为æ¯ä¸€ä¸ªæ¨¡å—ï¼ˆå³ Python 文件)编写一个 docstringï¼Œä½†ç›®å‰ OpenMMLab 项目大部分没有此类 docstring,因此ä¸åšç¡¬æ€§è¦æ±‚。 - - ```python - """A one line summary of the module or program, terminated by a period. - - Leave one blank line. The rest of this docstring should contain an - overall description of the module or program. Optionally, it may also - contain a brief description of exported classes and functions and/or usage - examples. - - Typical usage example: - - foo = ClassFoo() - bar = foo.FunctionBar() - """ - ``` - -2. 类文档 - - 类文档是我们最常需è¦ç¼–写的,此处,按照 OpenMMLab 的惯例,我们使用了与 Google 风格ä¸åŒçš„写法。如下例所示,文档中没有使用 Attributes æè¿°ç±»å±žæ€§ï¼Œè€Œæ˜¯ä½¿ç”¨ Args æè¿° __init__ å‡½æ•°çš„å‚æ•°ã€‚ - - 在 Args 中,éµç…§ `parameter (type): Description.` 的格å¼ï¼Œæè¿°æ¯ä¸€ä¸ªå‚数类型和功能。其中,多ç§ç±»åž‹å¯ä½¿ç”¨ `(float or str)` 的写法,å¯ä»¥ä¸º None çš„å‚æ•°å¯ä»¥å†™ä¸º `(int, optional)`。 - - ```python - class BaseRunner(metaclass=ABCMeta): - """The base class of Runner, a training helper for PyTorch. - - All subclasses should implement the following APIs: - - - ``run()`` - - ``train()`` - - ``val()`` - - ``save_checkpoint()`` - - Args: - model (:obj:`torch.nn.Module`): The model to be run. - batch_processor (callable, optional): A callable method that process - a data batch. The interface of this method should be - ``batch_processor(model, data, train_mode) -> dict``. - Defaults to None. - optimizer (dict or :obj:`torch.optim.Optimizer`, optional): It can be - either an optimizer (in most cases) or a dict of optimizers - (in models that requires more than one optimizer, e.g., GAN). - Defaults to None. - work_dir (str, optional): The working directory to save checkpoints - and logs. Defaults to None. - logger (:obj:`logging.Logger`): Logger used during training. - Defaults to None. (The default value is just for backward - compatibility) - meta (dict, optional): A dict records some import information such as - environment info and seed, which will be logged in logger hook. - Defaults to None. - max_epochs (int, optional): Total training epochs. Defaults to None. - max_iters (int, optional): Total training iterations. Defaults to None. - """ - - def __init__(self, - model, - batch_processor=None, - optimizer=None, - work_dir=None, - logger=None, - meta=None, - max_iters=None, - max_epochs=None): - ... - ``` - - å¦å¤–,在一些算法实现的主体类中,建议加入原论文的链接;如果å‚考了其他开æºä»£ç çš„实现,则应加入 modified from,而如果是直接å¤åˆ¶äº†å…¶ä»–代ç åº“的实现,则应加入 copied from ï¼Œå¹¶æ³¨æ„æºç çš„ License。如有必è¦ï¼Œä¹Ÿå¯ä»¥é€šè¿‡ .. math:: æ¥åŠ å…¥æ•°å­¦å…¬å¼ - - ```python - # å‚考实现 - # This func is modified from `detectron2 - # `_. - - # å¤åˆ¶ä»£ç  - # This code was copied from the `ubelt - # library`_. - - # 引用论文 & æ·»åŠ å…¬å¼ - class LabelSmoothLoss(nn.Module): - r"""Initializer for the label smoothed cross entropy loss. - - Refers to `Rethinking the Inception Architecture for Computer Vision - `_. - - This decreases gap between output scores and encourages generalization. - Labels provided to forward can be one-hot like vectors (NxC) or class - indices (Nx1). - And this accepts linear combination of one-hot like labels from mixup or - cutmix except multi-label task. - - Args: - label_smooth_val (float): The degree of label smoothing. - num_classes (int, optional): Number of classes. Defaults to None. - mode (str): Refers to notes, Options are "original", "classy_vision", - "multi_label". Defaults to "classy_vision". - reduction (str): The method used to reduce the loss. - Options are "none", "mean" and "sum". Defaults to 'mean'. - loss_weight (float): Weight of the loss. Defaults to 1.0. - - Note: - if the ``mode`` is "original", this will use the same label smooth - method as the original paper as: - - .. math:: - (1-\epsilon)\delta_{k, y} + \frac{\epsilon}{K} - - where :math:`\epsilon` is the ``label_smooth_val``, :math:`K` is - the ``num_classes`` and :math:`\delta_{k,y}` is Dirac delta, - which equals 1 for k=y and 0 otherwise. - - if the ``mode`` is "classy_vision", this will use the same label - smooth method as the `facebookresearch/ClassyVision - `_ repo as: - - .. math:: - \frac{\delta_{k, y} + \epsilon/K}{1+\epsilon} - - if the ``mode`` is "multi_label", this will accept labels from - multi-label task and smoothing them as: - - .. math:: - (1-2\epsilon)\delta_{k, y} + \epsilon - ``` - -```{note} -æ³¨æ„ \`\`here\`\`ã€\`here\`ã€"here" 三ç§å¼•å·åŠŸèƒ½æ˜¯ä¸åŒã€‚ - -在 reStructured 语法中,\`\`here\`\` 表示一段代ç ï¼›\`here\` 表示斜体;"here" 无特殊å«ä¹‰ï¼Œä¸€èˆ¬å¯ç”¨æ¥è¡¨ç¤ºå­—符串。其中 \`here\` 的用法与 Markdown 中ä¸åŒï¼Œéœ€è¦å¤šåŠ ç•™æ„。 -å¦å¤–还有 :obj:\`type\` è¿™ç§æ›´è§„范的表示类的写法,但鉴于长度,ä¸åšç‰¹åˆ«è¦æ±‚,一般仅用于表示éžå¸¸ç”¨ç±»åž‹ã€‚ -``` - -3. 方法(函数)文档 - - 函数文档与类文档的结构基本一致,但需è¦åŠ å…¥è¿”å›žå€¼æ–‡æ¡£ã€‚å¯¹äºŽè¾ƒä¸ºå¤æ‚的函数和类,å¯ä»¥ä½¿ç”¨ Examples 字段加入示例;如果需è¦å¯¹å‚数加入一些较长的备注,å¯ä»¥åŠ å…¥ Note 字段进行说明。 - - å¯¹äºŽä½¿ç”¨è¾ƒä¸ºå¤æ‚çš„ç±»æˆ–å‡½æ•°ï¼Œæ¯”èµ·çœ‹å¤§æ®µå¤§æ®µçš„è¯´æ˜Žæ–‡å­—å’Œå‚æ•°æ–‡æ¡£ï¼Œæ·»åŠ åˆé€‚çš„ç¤ºä¾‹æ›´èƒ½å¸®åŠ©ç”¨æˆ·è¿…é€Ÿäº†è§£å…¶ç”¨æ³•ã€‚éœ€è¦æ³¨æ„的是,这些示例最好是能够直接在 Python 交互å¼çŽ¯å¢ƒä¸­è¿è¡Œçš„,并给出一些相对应的结果。如果存在多个示例,å¯ä»¥ä½¿ç”¨æ³¨é‡Šç®€å•è¯´æ˜Žæ¯æ®µç¤ºä¾‹ï¼Œä¹Ÿèƒ½èµ·åˆ°åˆ†éš”作用。 - - ```python - def import_modules_from_strings(imports, allow_failed_imports=False): - """Import modules from the given list of strings. - - Args: - imports (list | str | None): The given module names to be imported. - allow_failed_imports (bool): If True, the failed imports will return - None. Otherwise, an ImportError is raise. Defaults to False. - - Returns: - List[module] | module | None: The imported modules. - All these three lines in docstring will be compiled into the same - line in readthedocs. - - Examples: - >>> osp, sys = import_modules_from_strings( - ... ['os.path', 'sys']) - >>> import os.path as osp_ - >>> import sys as sys_ - >>> assert osp == osp_ - >>> assert sys == sys_ - """ - ... - ``` - - 如果函数接å£åœ¨æŸä¸ªç‰ˆæœ¬å‘生了å˜åŒ–,需è¦åœ¨ docstring ä¸­åŠ å…¥ç›¸å…³çš„è¯´æ˜Žï¼Œå¿…è¦æ—¶æ·»åŠ  Note 或者 Warning 进行说明,例如: - - ```python - class CheckpointHook(Hook): - """Save checkpoints periodically. - - Args: - out_dir (str, optional): The root directory to save checkpoints. If - not specified, ``runner.work_dir`` will be used by default. If - specified, the ``out_dir`` will be the concatenation of - ``out_dir`` and the last level directory of ``runner.work_dir``. - Defaults to None. `Changed in version 1.3.15.` - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmcv.fileio.FileClient` for details. - Defaults to None. `New in version 1.3.15.` - - Warning: - Before v1.3.15, the ``out_dir`` argument indicates the path where the - checkpoint is stored. However, in v1.3.15 and later, ``out_dir`` - indicates the root directory and the final path to save checkpoint is - the concatenation of out_dir and the last level directory of - ``runner.work_dir``. Suppose the value of ``out_dir`` is - "/path/of/A" and the value of ``runner.work_dir`` is "/path/of/B", - then the final path will be "/path/of/A/B". - ``` - - å¦‚æžœå‚æ•°æˆ–返回值里带有需è¦å±•å¼€æè¿°å­—段的 dict,则应该采用如下格å¼ï¼š - - ```python - def func(x): - r""" - Args: - x (None): A dict with 2 keys, ``padded_targets``, and ``targets``. - - - ``targets`` (list[Tensor]): A list of tensors. - Each tensor has the shape of :math:`(T_i)`. Each - element is the index of a character. - - ``padded_targets`` (Tensor): A tensor of shape :math:`(N)`. - Each item is the length of a word. - - Returns: - dict: A dict with 2 keys, ``padded_targets``, and ``targets``. - - - ``targets`` (list[Tensor]): A list of tensors. - Each tensor has the shape of :math:`(T_i)`. Each - element is the index of a character. - - ``padded_targets`` (Tensor): A tensor of shape :math:`(N)`. - Each item is the length of a word. - """ - return x - ``` - -```{important} -ä¸ºäº†ç”Ÿæˆ readthedocs æ–‡æ¡£ï¼Œæ–‡æ¡£çš„ç¼–å†™éœ€è¦æŒ‰ç…§ ReStructrued 文档格å¼ï¼Œå¦åˆ™ä¼šäº§ç”Ÿæ–‡æ¡£æ¸²æŸ“错误,在æäº¤ PR å‰ï¼Œæœ€å¥½ç”Ÿæˆå¹¶é¢„览一下文档效果。 -语法规范å‚考: - -- [reStructuredText Primer - Sphinx documentation](https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#) -- [Example Google Style Python Docstrings ‒ napoleon 0.7 documentation](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html#example-google) -``` - -### 注释规范 - -#### 为什么è¦å†™æ³¨é‡Š - -对于一个开æºé¡¹ç›®ï¼Œå›¢é˜Ÿåˆä½œä»¥åŠç¤¾åŒºä¹‹é—´çš„åˆä½œæ˜¯å¿…ä¸å¯å°‘的,因而尤其è¦é‡è§†åˆç†çš„æ³¨é‡Šã€‚ä¸å†™æ³¨é‡Šçš„代ç ï¼Œå¾ˆæœ‰å¯èƒ½è¿‡å‡ ä¸ªæœˆè‡ªå·±ä¹Ÿéš¾ä»¥ç†è§£ï¼Œé€ æˆé¢å¤–çš„é˜…è¯»å’Œä¿®æ”¹æˆæœ¬ã€‚ - -#### 如何写注释 - -最需è¦å†™æ³¨é‡Šçš„æ˜¯ä»£ç ä¸­é‚£äº›æŠ€å·§æ€§çš„部分。如果你在下次代ç å®¡æŸ¥çš„æ—¶å€™å¿…é¡»è§£é‡Šä¸€ä¸‹ï¼Œé‚£ä¹ˆä½ åº”è¯¥çŽ°åœ¨å°±ç»™å®ƒå†™æ³¨é‡Šã€‚å¯¹äºŽå¤æ‚çš„æ“作,应该在其æ“作开始å‰å†™ä¸Šè‹¥å¹²è¡Œæ³¨é‡Šã€‚å¯¹äºŽä¸æ˜¯ä¸€ç›®äº†ç„¶çš„代ç ï¼Œåº”在其行尾添加注释。 -—— Google å¼€æºé¡¹ç›®é£Žæ ¼æŒ‡å— - -```python -# We use a weighted dictionary search to find out where i is in -# the array. We extrapolate position based on the largest num -# in the array and the array size and then do binary search to -# get the exact number. -if i & (i-1) == 0: # True if i is 0 or a power of 2. -``` - -为了æé«˜å¯è¯»æ€§, 注释应该至少离开代ç 2个空格. -å¦ä¸€æ–¹é¢, ç»ä¸è¦æè¿°ä»£ç . å‡è®¾é˜…读代ç çš„人比你更懂Python, ä»–åªæ˜¯ä¸çŸ¥é“你的代ç è¦åšä»€ä¹ˆ. -—— Google å¼€æºé¡¹ç›®é£Žæ ¼æŒ‡å— - -```python -# Wrong: -# Now go through the b array and make sure whenever i occurs -# the next element is i+1 - -# Wrong: -if i & (i-1) == 0: # True if i bitwise and i-1 is 0. -``` - -在注释中,å¯ä»¥ä½¿ç”¨ Markdown 语法,因为开å‘人员通常熟悉 Markdown 语法,这样å¯ä»¥ä¾¿äºŽäº¤æµç†è§£ï¼Œå¦‚å¯ä½¿ç”¨å•å引å·è¡¨ç¤ºä»£ç å’Œå˜é‡ï¼ˆæ³¨æ„ä¸è¦å’Œ docstring 中的 ReStructured 语法混淆) - -```python -# `_reversed_padding_repeated_twice` is the padding to be passed to -# `F.pad` if needed (e.g., for non-zero padding types that are -# implemented as two ops: padding + conv). `F.pad` accepts paddings in -# reverse order than the dimension. -self._reversed_padding_repeated_twice = _reverse_repeat_tuple(self.padding, 2) -``` - -#### 注释示例 - -1. 出自 `mmcv/utils/registry.py`ï¼Œå¯¹äºŽè¾ƒä¸ºå¤æ‚的逻辑结构,通过注释,明确了优先级关系。 - - ```python - # self.build_func will be set with the following priority: - # 1. build_func - # 2. parent.build_func - # 3. build_from_cfg - if build_func is None: - if parent is not None: - self.build_func = parent.build_func - else: - self.build_func = build_from_cfg - else: - self.build_func = build_func - ``` - -2. 出自 `mmcv/runner/checkpoint.py`,对于 bug ä¿®å¤ä¸­çš„一些特殊处ç†ï¼Œå¯ä»¥é™„带相关的 issue 链接,帮助其他人了解 bug 背景。 - - ```python - def _save_ckpt(checkpoint, file): - # The 1.6 release of PyTorch switched torch.save to use a new - # zipfile-based file format. It will cause RuntimeError when a - # checkpoint was saved in high version (PyTorch version>=1.6.0) but - # loaded in low version (PyTorch version<1.6.0). More details at - # https://github.com/open-mmlab/mmpose/issues/904 - if digit_version(TORCH_VERSION) >= digit_version('1.6.0'): - torch.save(checkpoint, file, _use_new_zipfile_serialization=False) - else: - torch.save(checkpoint, file) - ``` - -### 类型注解 - -#### 为什么è¦å†™ç±»åž‹æ³¨è§£ - -类型注解是对函数中å˜é‡çš„类型åšé™å®šæˆ–æç¤ºï¼Œä¸ºä»£ç çš„安全性æä¾›ä¿éšœã€å¢žå¼ºä»£ç çš„å¯è¯»æ€§ã€é¿å…出现类型相关的错误。 -Python 没有对类型åšå¼ºåˆ¶é™åˆ¶ï¼Œç±»åž‹æ³¨è§£åªèµ·åˆ°ä¸€ä¸ªæç¤ºä½œç”¨ï¼Œé€šå¸¸ä½ çš„ IDE 会解æžè¿™äº›ç±»åž‹æ³¨è§£ï¼Œç„¶åŽåœ¨ä½ è°ƒç”¨ç›¸å…³ä»£ç æ—¶å¯¹ç±»åž‹åšæç¤ºã€‚å¦å¤–也有类型注解检查工具,这些工具会根æ®ç±»åž‹æ³¨è§£ï¼Œå¯¹ä»£ç ä¸­å¯èƒ½å‡ºçŽ°çš„é—®é¢˜è¿›è¡Œæ£€æŸ¥ï¼Œå‡å°‘ bug 的出现。 -éœ€è¦æ³¨æ„的是,通常我们ä¸éœ€è¦æ³¨é‡Šæ¨¡å—中的所有函数: - -1. 公共的 API éœ€è¦æ³¨é‡Š -2. 在代ç çš„å®‰å…¨æ€§ï¼Œæ¸…æ™°æ€§å’Œçµæ´»æ€§ä¸Šè¿›è¡Œæƒè¡¡æ˜¯å¦æ³¨é‡Š -3. 对于容易出现类型相关的错误的代ç è¿›è¡Œæ³¨é‡Š -4. 难以ç†è§£çš„代ç è¯·è¿›è¡Œæ³¨é‡Š -5. 若代ç ä¸­çš„类型已ç»ç¨³å®šï¼Œå¯ä»¥è¿›è¡Œæ³¨é‡Š. 对于一份æˆç†Ÿçš„代ç ï¼Œå¤šæ•°æƒ…况下,å³ä½¿æ³¨é‡Šäº†æ‰€æœ‰çš„函数,也ä¸ä¼šä¸§å¤±å¤ªå¤šçš„çµæ´»æ€§. - -#### 如何写类型注解 - -1. 函数 / 方法类型注解,通常ä¸å¯¹ self å’Œ cls 注释。 - - ```python - from typing import Optional, List, Tuple - - # 全部ä½äºŽä¸€è¡Œ - def my_method(self, first_var: int) -> int: - pass - - # å¦èµ·ä¸€è¡Œ - def my_method( - self, first_var: int, - second_var: float) -> Tuple[MyLongType1, MyLongType1, MyLongType1]: - pass - - # å•独æˆè¡Œï¼ˆå…·ä½“的应用场åˆä¸Žè¡Œå®½æœ‰å…³ï¼Œå»ºè®®ç»“åˆ yapf 自动化格å¼ä½¿ç”¨ï¼‰ - def my_method( - self, first_var: int, second_var: float - ) -> Tuple[MyLongType1, MyLongType1, MyLongType1]: - pass - - # 引用尚未被定义的类型 - class MyClass: - def __init__(self, - stack: List["MyClass"]) -> None: - pass - ``` - - 注:类型注解中的类型å¯ä»¥æ˜¯ Python 内置类型,也å¯ä»¥æ˜¯è‡ªå®šä¹‰ç±»ï¼Œè¿˜å¯ä»¥ä½¿ç”¨ Python æä¾›çš„ wrapper 类对类型注解进行装饰,一些常è§çš„æ³¨è§£å¦‚下: - - ```python - # 数值类型 - from numbers import Number - - # å¯é€‰ç±»åž‹ï¼ŒæŒ‡å‚æ•°å¯ä»¥ä¸º None - from typing import Optional - def foo(var: Optional[int] = None): - pass - - # è”åˆç±»åž‹ï¼ŒæŒ‡åŒæ—¶æŽ¥å—多ç§ç±»åž‹ - from typing import Union - def foo(var: Union[float, str]): - pass - - from typing import Sequence # åºåˆ—类型 - from typing import Iterable # å¯è¿­ä»£ç±»åž‹ - from typing import Any # ä»»æ„类型 - from typing import Callable # å¯è°ƒç”¨ç±»åž‹ - - from typing import List, Dict # 列表和字典的泛型类型 - from typing import Tuple # å…ƒç»„çš„ç‰¹æ®Šæ ¼å¼ - # 虽然在 Python 3.9 中,list, tuple å’Œ dict æœ¬èº«å·²æ”¯æŒæ³›åž‹ï¼Œä½†ä¸ºäº†æ”¯æŒä¹‹å‰çš„版本 - # 我们在进行类型注解时还是需è¦ä½¿ç”¨ List, Tuple, Dict 类型 - # å¦å¤–ï¼Œåœ¨å¯¹å‚æ•°ç±»åž‹è¿›è¡Œæ³¨è§£æ—¶ï¼Œå°½é‡ä½¿ç”¨ Sequence & Iterable & Mapping - # List, Tuple, Dict 主è¦ç”¨äºŽè¿”回值类型注解 - # å‚è§ https://docs.python.org/3/library/typing.html#typing.List - ``` - -2. å˜é‡ç±»åž‹æ³¨è§£ï¼Œä¸€èˆ¬ç”¨äºŽéš¾ä»¥ç›´æŽ¥æŽ¨æ–­å…¶ç±»åž‹æ—¶ - - ```python - # Recommend: 带类型注解的赋值 - a: Foo = SomeUndecoratedFunction() - a: List[int]: [1, 2, 3] # List åªæ”¯æŒå•一类型泛型,å¯ä½¿ç”¨ Union - b: Tuple[int, int] = (1, 2) # 长度固定为 2 - c: Tuple[int, ...] = (1, 2, 3) # å˜é•¿ - d: Dict[str, int] = {'a': 1, 'b': 2} - - # Not Recommend:行尾类型注释 - # è™½ç„¶è¿™ç§æ–¹å¼è¢«å†™åœ¨äº† Google å¼€æºæŒ‡å—中,但这是一ç§ä¸ºäº†æ”¯æŒ Python 2.7 版本 - # 而补充的注释方å¼ï¼Œé‰´äºŽæˆ‘ä»¬åªæ”¯æŒ Python 3, ä¸ºäº†é£Žæ ¼ç»Ÿä¸€ï¼Œä¸æŽ¨èä½¿ç”¨è¿™ç§æ–¹å¼ã€‚ - a = SomeUndecoratedFunction() # type: Foo - a = [1, 2, 3] # type: List[int] - b = (1, 2, 3) # type: Tuple[int, ...] - c = (1, "2", 3.5) # type: Tuple[int, Text, float] - ``` - -3. 泛型 - - 上文中我们知é“,typing 中æä¾›äº† list å’Œ dict 的泛型类型,那么我们自己是å¦å¯ä»¥å®šä¹‰ç±»ä¼¼çš„æ³›åž‹å‘¢ï¼Ÿ - - ```python - from typing import TypeVar, Generic - - KT = TypeVar('KT') - VT = TypeVar('VT') - - class Mapping(Generic[KT, VT]): - def __init__(self, data: Dict[KT, VT]): - self._data = data - - def __getitem__(self, key: KT) -> VT: - return self._data[key] - ``` - - 使用上述方法,我们定义了一个拥有泛型能力的映射类,实际用法如下: - - ```python - mapping = Mapping[str, float]({'a': 0.5}) - value: float = example['a'] - ``` - - å¦å¤–,我们也å¯ä»¥åˆ©ç”¨ TypeVar 在函数签å中指定è”动的多个类型: - - ```python - from typing import TypeVar, List - - T = TypeVar('T') # Can be anything - A = TypeVar('A', str, bytes) # Must be str or bytes - - - def repeat(x: T, n: int) -> List[T]: - """Return a list containing n references to x.""" - return [x]*n - - - def longest(x: A, y: A) -> A: - """Return the longest of two strings.""" - return x if len(x) >= len(y) else y - ``` - -更多关于类型注解的写法请å‚考 [typing](https://docs.python.org/3/library/typing.html)。 - -#### 类型注解检查工具 - -[mypy](https://mypy.readthedocs.io/en/stable/) 是一个 Python 陿€ç±»åž‹æ£€æŸ¥å·¥å…·ã€‚æ ¹æ®ä½ çš„类型注解,mypy 会检查传å‚ã€èµ‹å€¼ç­‰æ“作是å¦ç¬¦åˆç±»åž‹æ³¨è§£ï¼Œä»Žè€Œé¿å…å¯èƒ½å‡ºçŽ°çš„ bug。 - -例如如下的一个 Python 脚本文件 test.py: - -```python -def foo(var: int) -> float: - return float(var) - -a: str = foo('2.0') -b: int = foo('3.0') # type: ignore -``` - -è¿è¡Œ mypy test.py å¯ä»¥å¾—到如下检查结果,分别指出了第 4 行在函数调用和返回值赋值两处类型错误。而第 5 è¡ŒåŒæ ·å­˜åœ¨ä¸¤ä¸ªç±»åž‹é”™è¯¯ï¼Œç”±äºŽä½¿ç”¨äº† type: ignore è€Œè¢«å¿½ç•¥äº†ï¼Œåªæœ‰éƒ¨åˆ†ç‰¹æ®Šæƒ…况å¯èƒ½éœ€è¦æ­¤ç±»å¿½ç•¥ã€‚ - -``` -test.py:4: error: Incompatible types in assignment (expression has type "float", variable has type "int") -test.py:4: error: Argument 1 to "foo" has incompatible type "str"; expected "int" -Found 2 errors in 1 file (checked 1 source file) -``` diff --git a/docs/zh_cn/community/contributing.md b/docs/zh_cn/community/contributing.md deleted file mode 100644 index e3aa781..0000000 --- a/docs/zh_cn/community/contributing.md +++ /dev/null @@ -1,278 +0,0 @@ -## è´¡çŒ®ä»£ç  - -欢迎加入 MMCV ç¤¾åŒºï¼Œæˆ‘ä»¬è‡´åŠ›äºŽæ‰“é€ æœ€å‰æ²¿çš„计算机视觉基础库,我们欢迎任何类型的贡献,包括但ä¸é™äºŽ - -**ä¿®å¤é”™è¯¯** - -ä¿®å¤ä»£ç å®žçŽ°é”™è¯¯çš„æ­¥éª¤å¦‚ä¸‹ï¼š - -1. 如果æäº¤çš„ä»£ç æ”¹åŠ¨è¾ƒå¤§ï¼Œå»ºè®®å…ˆæäº¤ issue,并正确æè¿° issue 的现象ã€åŽŸå› å’Œå¤çŽ°æ–¹å¼ï¼Œè®¨è®ºåŽç¡®è®¤ä¿®å¤æ–¹æ¡ˆã€‚ -2. ä¿®å¤é”™è¯¯å¹¶è¡¥å……相应的å•元测试,æäº¤æ‹‰å–请求。 - -**新增功能或组件** - -1. å¦‚æžœæ–°åŠŸèƒ½æˆ–æ¨¡å—æ¶‰åŠè¾ƒå¤§çš„ä»£ç æ”¹åŠ¨ï¼Œå»ºè®®å…ˆæäº¤ issueï¼Œç¡®è®¤åŠŸèƒ½çš„å¿…è¦æ€§ã€‚ -2. 实现新增功能并添å•元测试,æäº¤æ‹‰å–请求。 - -**文档补充** - -ä¿®å¤æ–‡æ¡£å¯ä»¥ç›´æŽ¥æäº¤æ‹‰å–请求 - -添加文档或将文档翻译æˆå…¶ä»–语言步骤如下 - -1. æäº¤ issueï¼Œç¡®è®¤æ·»åŠ æ–‡æ¡£çš„å¿…è¦æ€§ã€‚ -2. 添加文档,æäº¤æ‹‰å–请求。 - -### 拉å–è¯·æ±‚å·¥ä½œæµ - -如果你对拉å–请求ä¸äº†è§£ï¼Œæ²¡å…³ç³»ï¼ŒæŽ¥ä¸‹æ¥çš„内容将会从零开始,一步一步地指引你如何创建一个拉å–请求。如果你想深入了解拉å–è¯·æ±‚çš„å¼€å‘æ¨¡å¼ï¼Œå¯ä»¥å‚考 github [官方文档](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) - -#### 1. å¤åˆ»ä»“库 - -当你第一次æäº¤æ‹‰å–请求时,先å¤åˆ» OpenMMLab 原代ç åº“,点击 GitHub 页é¢å³ä¸Šè§’çš„ **Fork** 按钮,å¤åˆ»åŽçš„代ç åº“将会出现在你的 GitHub 个人主页下。 - - - -将代ç å…‹éš†åˆ°æœ¬åœ° - -```shell -git clone git@github.com:{username}/mmcv.git -``` - -添加原代ç åº“为上游代ç åº“ - -```bash -git remote add upstream git@github.com:open-mmlab/mmcv -``` - -检查 remote æ˜¯å¦æ·»åŠ æˆåŠŸï¼Œåœ¨ç»ˆç«¯è¾“å…¥ `git remote -v` - -```bash -origin git@github.com:{username}/mmcv.git (fetch) -origin git@github.com:{username}/mmcv.git (push) -upstream git@github.com:open-mmlab/mmcv (fetch) -upstream git@github.com:open-mmlab/mmcv (push) -``` - -```{note} -这里对 origin å’Œ upstream 进行一个简å•的介ç»ï¼Œå½“我们使用 git clone æ¥å…‹éš†ä»£ç æ—¶ï¼Œä¼šé»˜è®¤åˆ›å»ºä¸€ä¸ª origin çš„ remoteï¼Œå®ƒæŒ‡å‘æˆ‘们克隆的代ç åº“地å€ï¼Œè€Œ upstream åˆ™æ˜¯æˆ‘ä»¬è‡ªå·±æ·»åŠ çš„ï¼Œç”¨æ¥æŒ‡å‘原始代ç åº“地å€ã€‚当然如果你ä¸å–œæ¬¢ä»–å« upstream,也å¯ä»¥è‡ªå·±ä¿®æ”¹ï¼Œæ¯”å¦‚å« open-mmlabã€‚æˆ‘ä»¬é€šå¸¸å‘ origin æäº¤ä»£ç ï¼ˆå³ fork 下æ¥çš„远程仓库),然åŽå‘ upstream æäº¤ä¸€ä¸ª pull request。如果æäº¤çš„代ç å’Œæœ€æ–°çš„代ç å‘生冲çªï¼Œå†ä»Ž upstream æ‹‰å–æœ€æ–°çš„代ç ï¼Œå’Œæœ¬åœ°åˆ†æ”¯è§£å†³å†²çªï¼Œå†æäº¤åˆ° origin。 -``` - -#### 2. é…ç½® pre-commit - -在本地开å‘环境中,我们使用 [pre-commit](https://pre-commit.com/#intro) æ¥æ£€æŸ¥ä»£ç é£Žæ ¼ï¼Œä»¥ç¡®ä¿ä»£ç é£Žæ ¼çš„统一。在æäº¤ä»£ç ï¼Œéœ€è¦å…ˆå®‰è£… pre-commit(需è¦åœ¨ MMCV 目录下执行): - -```shell -pip install -U pre-commit -pre-commit install -``` - -检查 pre-commit 是å¦é…ç½®æˆåŠŸï¼Œå¹¶å®‰è£… `.pre-commit-config.yaml` 中的钩å­ï¼š - -```shell -pre-commit run --all-files -``` - - - - - -```{note} -如果你是中国用户,由于网络原因,å¯èƒ½ä¼šå‡ºçŽ°å®‰è£…å¤±è´¥çš„æƒ…å†µï¼Œè¿™æ—¶å¯ä»¥ä½¿ç”¨å›½å†…æº - -pre-commit install -c .pre-commit-config-zh-cn.yaml - -pre-commit run --all-files -c .pre-commit-config-zh-cn.yaml -``` - -如果安装过程被中断,å¯ä»¥é‡å¤æ‰§è¡Œ `pre-commit run ...` 继续安装。 - -如果æäº¤çš„代ç ä¸ç¬¦åˆä»£ç é£Žæ ¼è§„范,pre-commit 会å‘出警告,并自动修å¤éƒ¨åˆ†é”™è¯¯ã€‚ - - - -如果我们想临时绕开 pre-commit 的检查æäº¤ä¸€æ¬¡ä»£ç ï¼Œå¯ä»¥åœ¨ `git commit` 时加上 `--no-verify`(需è¦ä¿è¯æœ€å޿ލé€è‡³è¿œç¨‹ä»“库的代ç èƒ½å¤Ÿé€šè¿‡ pre-commit 检查)。 - -```shell -git commit -m "xxx" --no-verify -``` - -#### 3. 创建开å‘分支 - -安装完 pre-commit 之åŽï¼Œæˆ‘们需è¦åŸºäºŽ master 创建开å‘分支,建议的分支命å规则为 `username/pr_name`。 - -```shell -git checkout -b yhc/refactor_contributing_doc -``` - -在åŽç»­çš„å¼€å‘中,如果本地仓库的 master 分支è½åŽäºŽ upstream çš„ master 分支,我们需è¦å…ˆæ‹‰å– upstream 的代ç è¿›è¡ŒåŒæ­¥ï¼Œå†æ‰§è¡Œä¸Šé¢çš„命令 - -```shell -git pull upstream master -``` - -#### 4. æäº¤ä»£ç å¹¶åœ¨æœ¬åœ°é€šè¿‡å•元测试 - -- MMCV 引入了 mypy æ¥åšé™æ€ç±»åž‹æ£€æŸ¥ï¼Œä»¥å¢žåР代ç çš„鲿£’性。因此我们在æäº¤ä»£ç æ—¶ï¼Œéœ€è¦è¡¥å…… Type Hints。具体规则å¯ä»¥å‚考[教程](https://zhuanlan.zhihu.com/p/519335398)。 - -- æäº¤çš„代ç åŒæ ·éœ€è¦é€šè¿‡å•元测试 - - ```shell - # 通过全é‡å•元测试 - pytest tests - - # 我们需è¦ä¿è¯æäº¤çš„代ç èƒ½å¤Ÿé€šè¿‡ä¿®æ”¹æ¨¡å—çš„å•元测试,以 runner 为例 - pytest tests/test_runner/test_runner.py - ``` - - 如果你由于缺少ä¾èµ–无法è¿è¡Œä¿®æ”¹æ¨¡å—çš„å•元测试,å¯ä»¥å‚考[指引-å•元测试](#å•元测试) - -- 如果修改/添加了文档,å‚考[指引](#文档渲染)确认文档渲染正常。 - -#### 5. 推é€ä»£ç åˆ°è¿œç¨‹ - -代ç é€šè¿‡å•元测试和 pre-commit 检查åŽï¼Œå°†ä»£ç æŽ¨é€åˆ°è¿œç¨‹ä»“库,如果是第一次推é€ï¼Œå¯ä»¥åœ¨ `git push` åŽåŠ ä¸Š `-u` 傿•°ä»¥å…³è”远程分支 - -```shell -git push -u origin {branch_name} -``` - -这样下次就å¯ä»¥ç›´æŽ¥ä½¿ç”¨ `git push` 命令推é€ä»£ç äº†ï¼Œè€Œæ— éœ€æŒ‡å®šåˆ†æ”¯å’Œè¿œç¨‹ä»“库。 - -#### 6. æäº¤æ‹‰å–请求(PR) - -(1) 在 GitHub çš„ Pull request 界é¢åˆ›å»ºæ‹‰å–请求 - - -(2) æ ¹æ®æŒ‡å¼•修改 PR æè¿°ï¼Œä»¥ä¾¿äºŽå…¶ä»–å¼€å‘者更好地ç†è§£ä½ çš„修改 - - - -æè¿°è§„范详è§[拉å–请求规范](#拉å–请求规范) - -  - -**注æ„事项** - -(a) PR æè¿°åº”该包å«ä¿®æ”¹ç†ç”±ã€ä¿®æ”¹å†…容以åŠä¿®æ”¹åŽå¸¦æ¥çš„å½±å“,并关è”相关 Issue(具体方å¼è§[文档](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)) - -(b) 如果是第一次为 OpenMMLab åšè´¡çŒ®ï¼Œéœ€è¦ç­¾ç½² CLA - - - -(c) 检查æäº¤çš„ PR 是å¦é€šè¿‡ CIï¼ˆé›†æˆæµ‹è¯•) - - - -MMCV 会在ä¸åŒçš„å¹³å°ï¼ˆLinuxã€Windowã€Mac),基于ä¸åŒç‰ˆæœ¬çš„ Pythonã€PyTorchã€CUDA 对æäº¤çš„代ç è¿›è¡Œå•元测试,以ä¿è¯ä»£ç çš„æ­£ç¡®æ€§ï¼Œå¦‚果有任何一个没有通过,我们å¯ç‚¹å‡»ä¸Šå›¾ä¸­çš„ `Details` æ¥æŸ¥çœ‹å…·ä½“的测试信æ¯ï¼Œä»¥ä¾¿äºŽæˆ‘们修改代ç ã€‚ - -(3) 如果 PR 通过了 CI,那么就å¯ä»¥ç­‰å¾…å…¶ä»–å¼€å‘者的 reviewï¼Œå¹¶æ ¹æ® reviewer çš„æ„è§ï¼Œä¿®æ”¹ä»£ç ï¼Œå¹¶é‡å¤ [4](#4-æäº¤ä»£ç å¹¶æœ¬åœ°é€šè¿‡å•元测试)-[5](#5-推é€ä»£ç åˆ°è¿œç¨‹) 步骤,直到 reviewer åŒæ„åˆå…¥ PR。 - - - -所有 reviewer åŒæ„åˆå…¥ PR åŽï¼Œæˆ‘们会尽快将 PR åˆå¹¶åˆ°ä¸»åˆ†æ”¯ã€‚ - -#### 7. è§£å†³å†²çª - -éšç€æ—¶é—´çš„æŽ¨ç§»ï¼Œæˆ‘们的代ç åº“ä¼šä¸æ–­æ›´æ–°ï¼Œè¿™æ—¶å€™ï¼Œå¦‚果你的 PR 与主分支存在冲çªï¼Œä½ éœ€è¦è§£å†³å†²çªï¼Œè§£å†³å†²çªçš„æ–¹å¼æœ‰ä¸¤ç§ï¼š - -```shell -git fetch --all --prune -git rebase upstream/master -``` - -或者 - -```shell -git fetch --all --prune -git merge upstream/master -``` - -如果你éžå¸¸å–„于处ç†å†²çªï¼Œé‚£ä¹ˆå¯ä»¥ä½¿ç”¨ rebase çš„æ–¹å¼æ¥è§£å†³å†²çªï¼Œå› ä¸ºè¿™èƒ½å¤Ÿä¿è¯ä½ çš„ commit log 的整æ´ã€‚如果你ä¸å¤ªç†Ÿæ‚‰ `rebase` 的使用,那么å¯ä»¥ä½¿ç”¨ `merge` çš„æ–¹å¼æ¥è§£å†³å†²çªã€‚ - -### 指引 - -#### å•元测试 - -如果你无法正常执行部分模å—çš„å•元测试,例如 [video](https://github.com/open-mmlab/mmcv/tree/master/mmcv/video) 模å—,å¯èƒ½æ˜¯ä½ çš„当å‰çŽ¯å¢ƒæ²¡æœ‰å®‰è£…ä»¥ä¸‹ä¾èµ– - -```shell -# Linux -sudo apt-get update -y -sudo apt-get install -y libturbojpeg -sudo apt-get install -y ffmpeg - -# Windows -conda install ffmpeg -``` - -在æäº¤ä¿®å¤ä»£ç é”™è¯¯æˆ–新增特性的拉å–请求时,我们应该尽å¯èƒ½çš„让å•元测试覆盖所有æäº¤çš„代ç ï¼Œè®¡ç®—å•元测试覆盖率的方法如下 - -```shell -python -m coverage run -m pytest /path/to/test_file -python -m coverage html -# check file in htmlcov/index.html -``` - -#### 文档渲染 - -在æäº¤ä¿®å¤ä»£ç é”™è¯¯æˆ–新增特性的拉å–请求时,å¯èƒ½ä¼šéœ€è¦ä¿®æ”¹/新增模å—çš„ docstring。我们需è¦ç¡®è®¤æ¸²æŸ“åŽçš„æ–‡æ¡£æ ·å¼æ˜¯æ­£ç¡®çš„。 -æœ¬åœ°ç”Ÿæˆæ¸²æŸ“åŽçš„æ–‡æ¡£çš„æ–¹æ³•如下 - -```shell -pip install -r requirements/docs.txt -cd docs/zh_cn/ -# or docs/en -make html -# check file in ./docs/zh_cn/_build/html/index.html -``` - -### 代ç é£Žæ ¼ - -#### Python - -[PEP8](https://www.python.org/dev/peps/pep-0008/) 作为 OpenMMLab 算法库首选的代ç è§„范,我们使用以下工具检查和格å¼åŒ–ä»£ç  - -- [flake8](https://github.com/PyCQA/flake8): Python 官方å‘布的代ç è§„范检查工具,是多个检查工具的å°è£… -- [isort](https://github.com/timothycrosley/isort): 自动调整模å—导入顺åºçš„工具 -- [yapf](https://github.com/google/yapf): Google å‘布的代ç è§„范检查工具 -- [codespell](https://github.com/codespell-project/codespell): 检查å•è¯æ‹¼å†™æ˜¯å¦æœ‰è¯¯ -- [mdformat](https://github.com/executablebooks/mdformat): 检查 markdown 文件的工具 -- [docformatter](https://github.com/myint/docformatter): æ ¼å¼åŒ– docstring 的工具 - -yapf å’Œ isort çš„é…ç½®å¯ä»¥åœ¨ [setup.cfg](./setup.cfg) 找到 - -通过é…ç½® [pre-commit hook](https://pre-commit.com/) ,我们å¯ä»¥åœ¨æäº¤ä»£ç æ—¶è‡ªåŠ¨æ£€æŸ¥å’Œæ ¼å¼åŒ– `flake8`ã€`yapf`ã€`isort`ã€`trailing whitespaces`ã€`markdown files`, -ä¿®å¤ `end-of-files`ã€`double-quoted-strings`ã€`python-encoding-pragma`ã€`mixed-line-ending`,调整 `requirments.txt` 的包顺åºã€‚ -pre-commit é’©å­çš„é…ç½®å¯ä»¥åœ¨ [.pre-commit-config](./.pre-commit-config.yaml) 找到。 - -pre-commit 具体的安装使用方å¼è§[拉å–请求](#2-é…ç½®-pre-commit)。 - -更具体的规范请å‚考 [OpenMMLab 代ç è§„范](code_style.md)。 - -#### C++ and CUDA - -C++ å’Œ CUDA 的代ç è§„范éµä»Ž [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) - -### 拉å–请求规范 - -1. 使用 [pre-commit hook](https://pre-commit.com),尽é‡å‡å°‘代ç é£Žæ ¼ç›¸å…³é—®é¢˜ - -2. 一个`拉å–请求`对应一个短期分支 - -3. 粒度è¦ç»†ï¼Œä¸€ä¸ª`拉å–请求`åªåšä¸€ä»¶äº‹æƒ…,é¿å…超大的`拉å–请求` - - - Bad:实现 Faster R-CNN - - Acceptable:给 Faster R-CNN 添加一个 box head - - Good:给 box head å¢žåŠ ä¸€ä¸ªå‚æ•°æ¥æ”¯æŒè‡ªå®šä¹‰çš„ conv 层数 - -4. æ¯æ¬¡ Commit æ—¶éœ€è¦æä¾›æ¸…æ™°ä¸”æœ‰æ„义 commit ä¿¡æ¯ - -5. æä¾›æ¸…晰且有æ„义的`拉å–请求`æè¿° - - - 标题写明白任务å称,一般格å¼:\[Prefix\] Short description of the pull request (Suffix) - - prefix: 新增功能 \[Feature\], ä¿® bug \[Fix\], 文档相关 \[Docs\], å¼€å‘中 \[WIP\] (暂时ä¸ä¼šè¢«review) - - æè¿°é‡Œä»‹ç»`拉å–请求`的主è¦ä¿®æ”¹å†…容,结果,以åŠå¯¹å…¶ä»–部分的影å“, å‚考`拉å–请求`æ¨¡æ¿ - - å…³è”相关的`议题` (issue) 和其他`拉å–请求` - -6. 如果引入了其他三方库,或借鉴了三方库的代ç ï¼Œè¯·ç¡®è®¤ä»–们的许å¯è¯å’Œ mmcv 兼容,并在借鉴的代ç ä¸Šè¡¥å…… `This code is inspired from http://` diff --git a/docs/zh_cn/community/pr.md b/docs/zh_cn/community/pr.md deleted file mode 100644 index 427fdf9..0000000 --- a/docs/zh_cn/community/pr.md +++ /dev/null @@ -1,3 +0,0 @@ -## 拉å–请求 - -本文档的内容已è¿ç§»åˆ°[贡献指å—](contributing.md)。 diff --git a/docs/zh_cn/docutils.conf b/docs/zh_cn/docutils.conf deleted file mode 100644 index 0c00c84..0000000 --- a/docs/zh_cn/docutils.conf +++ /dev/null @@ -1,2 +0,0 @@ -[html writers] -table_style: colwidths-auto diff --git a/docs/zh_cn/faq.md b/docs/zh_cn/faq.md deleted file mode 100644 index 6cfb100..0000000 --- a/docs/zh_cn/faq.md +++ /dev/null @@ -1,91 +0,0 @@ -## 常è§é—®é¢˜ - -在这里我们列出了用户ç»å¸¸é‡åˆ°çš„问题以åŠå¯¹åº”的解决方法。如果您é‡åˆ°äº†å…¶ä»–常è§çš„问题,并且知é“å¯ä»¥å¸®åˆ°å¤§å®¶çš„解决办法, -æ¬¢è¿Žéšæ—¶ä¸°å¯Œè¿™ä¸ªåˆ—表。 - -### 安装问题 - -- KeyError: "xxx: 'yyy is not in the zzz registry'" - - åªæœ‰æ¨¡å—所在的文件被导入时,注册机制æ‰ä¼šè¢«è§¦å‘,所以您需è¦åœ¨æŸå¤„导入该文件,更多详情请查看 [KeyError: "MaskRCNN: 'RefineRoIHead is not in the models registry'"](https://github.com/open-mmlab/mmdetection/issues/5974)。 - -- "No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'" - - 1. 使用 `pip uninstall mmcv` å¸è½½æ‚¨çŽ¯å¢ƒä¸­çš„ mmcv - 2. å‚考 [installation instruction](https://mmcv.readthedocs.io/en/latest/get_started/installation.html) 或者 [Build MMCV from source](https://mmcv.readthedocs.io/en/latest/get_started/build.html) 安装 mmcv-full - -- "invalid device function" 或者 "no kernel image is available for execution" - - 1. 检查 GPU çš„ CUDA 计算能力 - 2. è¿è¡Œ `python mmdet/utils/collect_env.py` æ¥æ£€æŸ¥ PyTorchã€torchvision å’Œ MMCV æ˜¯å¦æ˜¯é’ˆå¯¹æ­£ç¡®çš„ GPU 架构构建的,您å¯èƒ½éœ€è¦åŽ»è®¾ç½® `TORCH_CUDA_ARCH_LIST` æ¥é‡æ–°å®‰è£… MMCV。兼容性问题å¯èƒ½ä¼šå‡ºçŽ°åœ¨ä½¿ç”¨æ—§ç‰ˆçš„ GPUs,如:colab 上的 Tesla K80 (3.7) - 3. 检查è¿è¡ŒçŽ¯å¢ƒæ˜¯å¦å’Œ mmcv/mmdet 编译时的环境相åŒã€‚例如,您å¯èƒ½ä½¿ç”¨ CUDA 10.0 编译 mmcv,但在 CUDA 9.0 的环境中è¿è¡Œå®ƒ - -- "undefined symbol" 或者 "cannot open xxx.so" - - 1. 如果符å·å’Œ CUDA/C++ 相关(例如:libcudart.so 或者 GLIBCXX),请检查 CUDA/GCC è¿è¡Œæ—¶çš„版本是å¦å’Œç¼–译 mmcv 的一致 - 2. 如果符å·å’Œ PyTorch 相关(例如:符å·åŒ…å« caffeã€aten å’Œ TH),请检查 PyTorch è¿è¡Œæ—¶çš„版本是å¦å’Œç¼–译 mmcv 的一致 - 3. è¿è¡Œ `python mmdet/utils/collect_env.py` 以检查 PyTorchã€torchvision å’Œ MMCV 构建和è¿è¡Œçš„环境是å¦ç›¸åŒ - -- "RuntimeError: CUDA error: invalid configuration argument" - - 这个错误å¯èƒ½æ˜¯ç”±äºŽæ‚¨çš„ GPU 性能ä¸ä½³é€ æˆçš„。å°è¯•é™ä½Ž [THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10) - çš„å€¼å¹¶é‡æ–°ç¼–译 mmcv。 - -- "RuntimeError: nms is not compiled with GPU support" - - 这个错误是由于您的 CUDA 环境没有正确安装。 - 您å¯ä»¥å°è¯•釿–°å®‰è£…您的 CUDA 环境,然åŽåˆ é™¤ mmcv/build æ–‡ä»¶å¤¹å¹¶é‡æ–°ç¼–译 mmcv。 - -- "Segmentation fault" - - 1. 检查 GCC 的版本,通常是因为 PyTorch 版本与 GCC 版本ä¸åŒ¹é… (例如 GCC \< 4.9 ),我们推è用户使用 GCC 5.4ï¼Œæˆ‘ä»¬ä¹Ÿä¸æŽ¨è使用 GCC 5.5, 因为有å馈 GCC 5.5 会导致 "segmentation fault" 并且切æ¢åˆ° GCC 5.4 å°±å¯ä»¥è§£å†³é—®é¢˜ - 2. æ£€æŸ¥æ˜¯å¦æ­£ç¡®å®‰è£… CUDA 版本的 PyTorc。输入以下命令并检查是å¦è¿”回 True - ```shell - python -c 'import torch; print(torch.cuda.is_available())' - ``` - 3. 如果 `torch` 安装æˆåŠŸï¼Œé‚£ä¹ˆæ£€æŸ¥ MMCV 是å¦å®‰è£…æˆåŠŸã€‚è¾“å…¥ä»¥ä¸‹å‘½ä»¤ï¼Œå¦‚æžœæ²¡æœ‰æŠ¥é”™è¯´æ˜Ž mmcv-full 安装æˆã€‚ - ```shell - python -c 'import mmcv; import mmcv.ops' - ``` - 4. 如果 MMCV 与 PyTorch 都安装æˆåŠŸäº†ï¼Œåˆ™å¯ä»¥ä½¿ç”¨ `ipdb` 设置断点或者使用 `print` å‡½æ•°ï¼Œåˆ†æžæ˜¯å“ªä¸€éƒ¨åˆ†çš„代ç å¯¼è‡´äº† `segmentation fault` - -- "libtorch_cuda_cu.so: cannot open shared object file" - - `mmcv-full` ä¾èµ– `libtorch_cuda_cu.so` 文件,但程åºè¿è¡Œæ—¶æ²¡èƒ½æ‰¾åˆ°è¯¥æ–‡ä»¶ã€‚我们å¯ä»¥æ£€æŸ¥è¯¥æ–‡ä»¶æ˜¯å¦å­˜åœ¨ `~/miniconda3/envs/{environment-name}/lib/python3.7/site-packages/torch/lib` 也å¯ä»¥å°è¯•é‡è£… PyTorch。 - -- "fatal error C1189: #error: -- unsupported Microsoft Visual Studio version!" - - 如果您在 Windows 上编译 mmcv-full 并且 CUDA 的版本是 9.2,您很å¯èƒ½ä¼šé‡åˆ°è¿™ä¸ªé—®é¢˜ `"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.2\include\crt/host_config.h(133): fatal error C1189: #error: -- unsupported Microsoft Visual Studio version! Only the versions 2012, 2013, 2015 and 2017 are supported!"`,您å¯ä»¥å°è¯•使用低版本的 Microsoft Visual Studio,例如 vs2017。 - -- "error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized" - - 如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.5.0,您很å¯èƒ½ä¼šé‡åˆ°è¿™ä¸ªé—®é¢˜ `- torch/csrc/jit/api/module.h(474): error: member "torch::jit::detail::ModulePolicy::all_slots" may not be initialized`。解决这个问题的方法是将 `torch/csrc/jit/api/module.h` 文件中所有 `static constexpr bool all_slots = false;` 替æ¢ä¸º `static bool all_slots = false;`。更多细节å¯ä»¥æŸ¥çœ‹ [member "torch::jit::detail::AttributePolicy::all_slots" may not be initialized](https://github.com/pytorch/pytorch/issues/39394)。 - -- "error: a member with an in-class initializer must be const" - - 如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.6.0,您很å¯èƒ½ä¼šé‡åˆ°è¿™ä¸ªé—®é¢˜ `"- torch/include\torch/csrc/jit/api/module.h(483): error: a member with an in-class initializer must be const"`. 解决这个问题的方法是将 `torch/include\torch/csrc/jit/api/module.h` 文件中的所有 `CONSTEXPR_EXCEPT_WIN_CUDA ` 替æ¢ä¸º `const`。更多细节å¯ä»¥æŸ¥çœ‹ [Ninja: build stopped: subcommand failed](https://github.com/open-mmlab/mmcv/issues/575)。 - -- "error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized" - - 如果您在 Windows 上编译 mmcv-full 并且 PyTorch 的版本是 1.7.0,您很å¯èƒ½ä¼šé‡åˆ°è¿™ä¸ªé—®é¢˜ `torch/include\torch/csrc/jit/ir/ir.h(1347): error: member "torch::jit::ProfileOptionalOp::Kind" may not be initialized`. 解决这个问题的方法是修改 PyTorch 中的几个文件: - - - 删除 `torch/include\torch/csrc/jit/ir/ir.h` 文件中的 `static constexpr Symbol Kind = ::c10::prim::profile;` å’Œ `tatic constexpr Symbol Kind = ::c10::prim::profile_optional;` - - å°† `torch\include\pybind11\cast.h` 文件中的 `explicit operator type&() { return *(this->value); }` 替æ¢ä¸º `explicit operator type&() { return *((type*)this->value); }` - - å°† `torch/include\torch/csrc/jit/api/module.h` 文件中的 所有 `CONSTEXPR_EXCEPT_WIN_CUDA` 替æ¢ä¸º `const` - - 更多细节å¯ä»¥æŸ¥çœ‹ [Ensure default extra_compile_args](https://github.com/pytorch/pytorch/pull/45956)。 - -- MMCV å’Œ MMDetection 的兼容性问题;"ConvWS is already registered in conv layer" - - 请å‚考 [installation instruction](https://mmdetection.readthedocs.io/en/latest/get_started.html#installation) 为您的 MMDetection 版本安装正确版本的 MMCV。 - -### 使用问题 - -- "RuntimeError: Expected to have finished reduction in the prior iteration before starting a new one" - - 1. è¿™ä¸ªé”™è¯¯æ˜¯å› ä¸ºæœ‰äº›å‚æ•°æ²¡æœ‰å‚与 loss 的计算,å¯èƒ½æ˜¯ä»£ç ä¸­å­˜åœ¨å¤šä¸ªåˆ†æ”¯ï¼Œå¯¼è‡´æœ‰äº›åˆ†æ”¯æ²¡æœ‰å‚与 loss çš„è®¡ç®—ã€‚æ›´å¤šç»†èŠ‚è§ [Expected to have finished reduction in the prior iteration before starting a new one](https://github.com/pytorch/pytorch/issues/55582)。 - 2. ä½ å¯ä»¥è®¾ç½® DDP 中的 `find_unused_parameters` 为 `True`ï¼Œæˆ–è€…æ‰‹åŠ¨æŸ¥æ‰¾å“ªäº›å‚æ•°æ²¡æœ‰ç”¨åˆ°ã€‚ - -- "RuntimeError: Trying to backward through the graph a second time" - - ä¸èƒ½åŒæ—¶è®¾ç½® `GradientCumulativeOptimizerHook` å’Œ `OptimizerHook`,这会导致 `loss.backward()` è¢«è°ƒç”¨ä¸¤æ¬¡ï¼ŒäºŽæ˜¯ç¨‹åºæŠ›å‡º `RuntimeError`。我们åªéœ€è®¾ç½®å…¶ä¸­çš„ä¸€ä¸ªã€‚æ›´å¤šç»†èŠ‚è§ [Trying to backward through the graph a second time](https://github.com/open-mmlab/mmcv/issues/1379)。 diff --git a/docs/zh_cn/get_started/article.md b/docs/zh_cn/get_started/article.md deleted file mode 100644 index 9676850..0000000 --- a/docs/zh_cn/get_started/article.md +++ /dev/null @@ -1,63 +0,0 @@ -## 解读文章汇总 - -这篇文章汇总了 [OpenMMLab](https://www.zhihu.com/people/openmmlab) è§£è¯»çš„éƒ¨åˆ†æ–‡ç« ï¼ˆæ›´å¤šæ–‡ç« å’Œè§†é¢‘è§ [OpenMMLabCourse](https://github.com/open-mmlab/OpenMMLabCourse)),如果您有推è的文章(ä¸ä¸€å®šæ˜¯ OpenMMLab å‘布的文章,å¯ä»¥æ˜¯è‡ªå·±å†™çš„æ–‡ç« ï¼‰ï¼Œéžå¸¸æ¬¢è¿Žæ [Pull Request](http://127.0.0.1:5501/mmcv/docs/zh_cn/_build/html/community/pr.html) 添加到这里。 - -### MMCV 解读文章 - -#### 框架解读 - -- [MMCV 核心组件分æž(一):整体概述](https://zhuanlan.zhihu.com/p/336081587) -- [MMCV 核心组件分æž(二):FileHandler](https://zhuanlan.zhihu.com/p/336097883) -- [MMCV 核心组件分æž(三): FileClient](https://zhuanlan.zhihu.com/p/339190576) -- [MMCV 核心组件分æž(å››): Config](https://zhuanlan.zhihu.com/p/346203167) -- [MMCV 核心组件分æž(五): Registry](https://zhuanlan.zhihu.com/p/355271993) -- [MMCV 核心组件分æž(å…­): Hook](https://zhuanlan.zhihu.com/p/355272220) -- [MMCV 核心组件分æž(七): Runner](https://zhuanlan.zhihu.com/p/355272459) -- [MMCV Hook 食用指å—](https://zhuanlan.zhihu.com/p/448600739) -- [PyTorch & MMCV Dispatcher 机制解æž](https://zhuanlan.zhihu.com/p/451671838) - -#### 工具解读 - -- [训练å¯è§†åŒ–工具哪款是你的èœï¼ŸMMCV一行代ç éšä½ æŒ‘](https://zhuanlan.zhihu.com/p/387078211) - -#### å®‰è£…æŒ‡å— - -- [久等了ï¼Windows å¹³å° MMCV 的预编译包终于æ¥äº†ï¼](https://zhuanlan.zhihu.com/p/441653536) -- [Windows 环境从零安装 mmcv-full](https://zhuanlan.zhihu.com/p/434491590) - -#### 知乎问答 - -- [深度学习科研,如何高效进行代ç å’Œå®žéªŒç®¡ç†ï¼Ÿ](https://www.zhihu.com/question/269707221/answer/2480772257) -- [深度学习方é¢çš„ç§‘ç ”å·¥ä½œä¸­çš„å®žéªŒä»£ç æœ‰ä»€ä¹ˆè§„范和写作技巧?如何妥善管ç†å®žéªŒæ•°æ®ï¼Ÿ](https://www.zhihu.com/question/268193800/answer/2586000037) - -### 下游算法库解读文章 - -- [MMDetection](https://mmdetection.readthedocs.io/zh_CN/latest/article.html) - -### PyTorch 解读文章 - -- [PyTorch1.11 亮点一览:TorchDataã€functorchã€DDP 陿€å›¾](https://zhuanlan.zhihu.com/p/486222256) -- [PyTorch1.12 亮点一览:DataPipe + TorchArrow 新的数æ®åŠ è½½ä¸Žå¤„ç†èŒƒå¼](https://zhuanlan.zhihu.com/p/537868554) -- [PyTorch æºç è§£è¯»ä¹‹ nn.Moduleï¼šæ ¸å¿ƒç½‘ç»œæ¨¡å—æŽ¥å£è¯¦è§£](https://zhuanlan.zhihu.com/p/340453841) -- [PyTorch æºç è§£è¯»ä¹‹ torch.autograd:梯度计算详解](https://zhuanlan.zhihu.com/p/321449610) -- [PyTorch æºç è§£è¯»ä¹‹ torch.utils.dataï¼šè§£æžæ•°æ®å¤„ç†å…¨æµç¨‹](https://zhuanlan.zhihu.com/p/337850513) -- [PyTorch æºç è§£è¯»ä¹‹ torch.optim:优化算法接å£è¯¦è§£](https://zhuanlan.zhihu.com/p/346205754) -- [PyTorch æºç è§£è¯»ä¹‹ DP & DDP:模型并行和分布å¼è®­ç»ƒè§£æž](https://zhuanlan.zhihu.com/p/343951042) -- [PyTorch æºç è§£è¯»ä¹‹ BN & SyncBN:BN 与 多å¡åŒæ­¥ BN 详解](https://zhuanlan.zhihu.com/p/337732517) -- [PyTorch æºç è§£è¯»ä¹‹ torch.cuda.amp: 自动混åˆç²¾åº¦è¯¦è§£](https://zhuanlan.zhihu.com/p/348554267) -- [PyTorch æºç è§£è¯»ä¹‹ cpp_extension:æ­ç§˜ C++/CUDA ç®—å­å®žçŽ°å’Œè°ƒç”¨å…¨æµç¨‹](https://zhuanlan.zhihu.com/p/348555597) -- [PyTorch æºç è§£è¯»ä¹‹å³æ—¶ç¼–译篇](https://zhuanlan.zhihu.com/p/361101354) -- [PyTorch æºç è§£è¯»ä¹‹åˆ†å¸ƒå¼è®­ç»ƒäº†è§£ä¸€ä¸‹ï¼Ÿ](https://zhuanlan.zhihu.com/p/361314953) -- [PyTorch æºç è§£è¯»ä¹‹ torch.serialization & torch.hub](https://zhuanlan.zhihu.com/p/364239544) - -### å…¶ä»– - -- [困扰我 48 å°æ—¶çš„æ·±æ‹·è´ï¼Œä»Šå¤©ç»ˆäºŽ...](https://zhuanlan.zhihu.com/p/470892209) -- [拿什么拯救我的 4G 显å¡](https://zhuanlan.zhihu.com/p/430123077) -- [是è°å·å·åŠ¨äº†æˆ‘çš„ logger](https://zhuanlan.zhihu.com/p/481383590) -- [三å¥è¯ï¼Œè®© logger 言å¬è®¡ä»Ž](https://zhuanlan.zhihu.com/p/487524917) -- [Logging ä¸ä¸ºäººçŸ¥çš„二三事](https://zhuanlan.zhihu.com/p/502610682) -- [Type Hints å…¥é—¨æ•™ç¨‹ï¼Œè®©ä»£ç æ›´åŠ è§„èŒƒæ•´æ´](https://zhuanlan.zhihu.com/p/519335398) -- [手把手教你如何高效地在 MMCV 中贡献算å­](https://zhuanlan.zhihu.com/p/464492627) -- [OpenMMLab æ”¯æŒ IPU 训练芯片](https://zhuanlan.zhihu.com/p/517527926) -- [基于 MMCV 走上开æºå¤§ä½¬ä¹‹è·¯ï¼Ÿ](https://zhuanlan.zhihu.com/p/391144979) diff --git a/docs/zh_cn/get_started/build.md b/docs/zh_cn/get_started/build.md deleted file mode 100644 index 95f611b..0000000 --- a/docs/zh_cn/get_started/build.md +++ /dev/null @@ -1,300 +0,0 @@ -## 从æºç ç¼–译 MMCV - -### 编译 mmcv - -在编译 mmcv 之å‰ï¼Œè¯·ç¡®ä¿ PyTorch å·²ç»æˆåŠŸå®‰è£…åœ¨çŽ¯å¢ƒä¸­ï¼Œå¯ä»¥å‚考 [PyTorch 官方安装文档](https://pytorch.org/get-started/locally/#start-locally)。å¯ä½¿ç”¨ä»¥ä¸‹å‘½ä»¤éªŒè¯ - -```bash -python -c 'import torch;print(torch.__version__)' -``` - -:::{note} - -- 如果克隆代ç ä»“库的速度过慢,å¯ä»¥ä½¿ç”¨ä»¥ä¸‹å‘½ä»¤å…‹éš†ï¼ˆæ³¨æ„:gitee çš„ mmcv ä¸ä¸€å®šå’Œ github çš„ä¿æŒä¸€è‡´ï¼Œå› ä¸ºæ¯å¤©åªåŒæ­¥ä¸€æ¬¡ï¼‰ - -```bash -git clone https://gitee.com/open-mmlab/mmcv.git -``` - -- 如果打算使用 `opencv-python-headless` è€Œä¸æ˜¯ `opencv-python`,例如在一个很å°çš„容器环境或者没有图形用户界é¢çš„æœåŠ¡å™¨ä¸­ï¼Œä½ å¯ä»¥å…ˆå®‰è£… `opencv-python-headless`,这样在安装 mmcv ä¾èµ–的过程中会跳过 `opencv-python`。 - -- 如果编译过程安装ä¾èµ–库的时间过长,å¯ä»¥[设置 pypi æº](https://mirrors.tuna.tsinghua.edu.cn/help/pypi/) - -```bash -pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple -``` - -::: - -#### 在 Linux 上编译 mmcv - -| TODO: 视频教程 - -1. 克隆代ç ä»“库 - - ```bash - git clone https://github.com/open-mmlab/mmcv.git - cd mmcv - ``` - -2. 安装 `ninja` å’Œ `psutil` 以加快编译速度 - - ```bash - pip install -r requirements/optional.txt - ``` - -3. 检查 nvcc çš„ç‰ˆæœ¬ï¼ˆè¦æ±‚大于等于 9.2,如果没有 GPU,å¯ä»¥è·³è¿‡ï¼‰ - - ```bash - nvcc --version - ``` - - 上述命令如果输出以下信æ¯ï¼Œè¡¨ç¤º nvcc 的设置没有问题,å¦åˆ™éœ€è¦è®¾ç½® CUDA_HOME - - ``` - nvcc: NVIDIA (R) Cuda compiler driver - Copyright (c) 2005-2020 NVIDIA Corporation - Built on Mon_Nov_30_19:08:53_PST_2020 - Cuda compilation tools, release 11.2, V11.2.67 - Build cuda_11.2.r11.2/compiler.29373293_0 - ``` - - :::{note} - å¦‚æžœæƒ³è¦æ”¯æŒ ROCm,å¯ä»¥å‚考 [AMD ROCm](https://rocmdocs.amd.com/en/latest/Installation_Guide/Installation-Guide.html) 安装 ROCm。 - ::: - -4. 检查 gcc çš„ç‰ˆæœ¬ï¼ˆè¦æ±‚大于等于**5.4**) - - ```bash - gcc --version - ``` - -5. 开始编译(预估耗时 10 分钟) - - ```bash - pip install -e . -v - ``` - -6. 验è¯å®‰è£… - - ```bash - python .dev_scripts/check_installation.py - ``` - - 如果上述命令没有报错,说明安装æˆåŠŸã€‚å¦‚æœ‰æŠ¥é”™ï¼Œè¯·æŸ¥çœ‹[问题解决页é¢](../faq.html)是å¦å·²ç»æœ‰è§£å†³æ–¹æ¡ˆã€‚ - - 如果没有找到解决方案,欢迎æ [issue](https://github.com/open-mmlab/mmcv/issues)。 - -#### 在 macOS 上编译 mmcv - -| TODO: 视频教程 - -```{note} -如果你使用的是æ­è½½ apple silicon çš„ mac 设备,请安装 PyTorch 1.13+ 的版本,å¦åˆ™ä¼šé‡åˆ° [issues#2218](https://github.com/open-mmlab/mmcv/issues/2218) 中的问题。 -``` - -1. 克隆代ç ä»“库 - - ```bash - git clone https://github.com/open-mmlab/mmcv.git - cd mmcv - ``` - -2. 安装 `ninja` å’Œ `psutil` 以加快编译速度 - - ```bash - pip install -r requirements/optional.txt - ``` - -3. 开始编译 - - ```bash - pip install -e . - ``` - -4. 验è¯å®‰è£… - - ```bash - python .dev_scripts/check_installation.py - ``` - - 如果上述命令没有报错,说明安装æˆåŠŸã€‚å¦‚æœ‰æŠ¥é”™ï¼Œè¯·æŸ¥çœ‹[问题解决页é¢](../faq.md)是å¦å·²ç»æœ‰è§£å†³æ–¹æ¡ˆã€‚ - - 如果没有找到解决方案,欢迎æ [issue](https://github.com/open-mmlab/mmcv/issues)。 - -#### 在 Windows 上编译 mmcv - -| TODO: 视频教程 - -在 Windows 上编译 mmcv 比 Linux 夿‚,本节将一步步介ç»å¦‚何在 Windows 上编译 mmcv。 - -##### ä¾èµ–项 - -请先安装以下的ä¾èµ–项: - -- [Git](https://git-scm.com/download/win):安装期间,请选择 **add git to Path** -- [Visual Studio Community 2019](https://visualstudio.microsoft.com):用于编译 C++ å’Œ CUDA ä»£ç  -- [Miniconda](https://docs.conda.io/en/latest/miniconda.html):包管ç†å·¥å…· -- [CUDA 10.2](https://developer.nvidia.com/cuda-10.2-download-archive):如果åªéœ€è¦ CPU 版本å¯ä»¥ä¸å®‰è£… CUDA,安装 CUDA æ—¶ï¼Œå¯æ ¹æ®éœ€è¦è¿›è¡Œè‡ªå®šä¹‰å®‰è£…。如果已ç»å®‰è£…新版本的显å¡é©±åŠ¨ï¼Œå»ºè®®å–æ¶ˆé©±åŠ¨ç¨‹åºçš„安装 - -```{note} -å¦‚æžœä¸æ¸…楚如何安装以上ä¾èµ–,请å‚考[Windows 环境从零安装 mmcv](https://zhuanlan.zhihu.com/p/434491590)。 -å¦å¤–,你需è¦çŸ¥é“如何在 Windows 上设置å˜é‡çŽ¯å¢ƒï¼Œå°¤å…¶æ˜¯ "PATH" 的设置,以下安装过程都会用到。 -``` - -##### 通用步骤 - -1. 从 Windows èœå•å¯åЍ Anaconda 命令行 - - 如 Miniconda 安装程åºå»ºè®®ï¼Œä¸è¦ä½¿ç”¨åŽŸå§‹çš„ `cmd.exe` 或是 `powershell.exe`。命令行有两个版本,一个基于 PowerShell,一个基于传统的 `cmd.exe`。请注æ„以下说明都是使用的基于 PowerShell - -2. 创建一个新的 Conda 环境 - - ```powershell - (base) PS C:\Users\xxx> conda create --name mmcv python=3.7 - (base) PS C:\Users\xxx> conda activate mmcv # ç¡®ä¿åšä»»ä½•æ“作å‰å…ˆæ¿€æ´»çŽ¯å¢ƒ - ``` - -3. 安装 PyTorch 时,å¯ä»¥æ ¹æ®éœ€è¦å®‰è£…æ”¯æŒ CUDA æˆ–ä¸æ”¯æŒ CUDA 的版本 - - ```powershell - # CUDA version - (mmcv) PS C:\Users\xxx> conda install pytorch torchvision cudatoolkit=10.2 -c pytorch - # CPU version - (mmcv) PS C:\Users\xxx> conda install install pytorch torchvision cpuonly -c pytorch - ``` - -4. 克隆代ç ä»“库 - - ```powershell - (mmcv) PS C:\Users\xxx> git clone https://github.com/open-mmlab/mmcv.git - (mmcv) PS C:\Users\xxx> cd mmcv - ``` - -5. 安装 `ninja` å’Œ `psutil` 以加快编译速度 - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> pip install -r requirements/optional.txt - ``` - -6. 设置 MSVC 编译器 - - 设置环境å˜é‡ã€‚添加 `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\bin\Hostx86\x64` 到 `PATH`,则 `cl.exe` å¯ä»¥åœ¨å‘½ä»¤è¡Œä¸­è¿è¡Œï¼Œå¦‚下所示。 - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> cl - Microsoft (R) C/C++ Optimizing Compiler Version 19.27.29111 for x64 - Copyright (C) Microsoft Corporation. All rights reserved. - - usage: cl [ option... ] filename... [ / link linkoption... ] - ``` - - 为了兼容性,我们使用 x86-hosted ä»¥åŠ x64-targeted 版本,å³è·¯å¾„中的 `Hostx86\x64` 。 - - 因为 PyTorch å°†è§£æž `cl.exe` çš„è¾“å‡ºä»¥æ£€æŸ¥å…¶ç‰ˆæœ¬ï¼Œåªæœ‰ utf-8 将会被识别,你å¯èƒ½éœ€è¦å°†ç³»ç»Ÿè¯­è¨€æ›´æ”¹ä¸ºè‹±è¯­ã€‚æŽ§åˆ¶é¢æ¿ -> 地区-> 管ç†-> éž Unicode æ¥è¿›è¡Œè¯­è¨€è½¬æ¢ã€‚ - -##### 编译与安装 mmcv - -mmcv 有两个版本: - -- åªåŒ…å« CPU ç®—å­çš„版本 - - 编译 CPU ç®—å­ï¼Œä½†åªæœ‰ x86 将会被编译,并且编译版本åªèƒ½åœ¨ CPU only 情况下è¿è¡Œ - -- æ—¢åŒ…å« CPU ç®—å­ï¼ŒåˆåŒ…å« CUDA ç®—å­çš„版本 - - åŒæ—¶ç¼–译 CPU å’Œ CUDA ç®—å­ï¼Œ`ops` 模å—çš„ x86 与 CUDA 的代ç éƒ½å¯ä»¥è¢«ç¼–è¯‘ã€‚åŒæ—¶ç¼–译的版本å¯ä»¥åœ¨ CUDA 上调用 GPU - -###### CPU 版本 - -编译安装 - -```powershell -(mmcv) PS C:\Users\xxx\mmcv> python setup.py build_ext # 如果æˆåŠŸ, cl 将被å¯åŠ¨ç”¨äºŽç¼–è¯‘ç®—å­ -(mmcv) PS C:\Users\xxx\mmcv> python setup.py develop # 安装 -``` - -###### GPU 版本 - -1. 检查 `CUDA_PATH` 或者 `CUDA_HOME` 环境å˜é‡å·²ç»å­˜åœ¨åœ¨ `envs` 之中 - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> ls env: - - Name Value - ---- ----- - CUDA_PATH C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 - CUDA_PATH_V10_1 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1 - CUDA_PATH_V10_2 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 - ``` - - 如果没有,你å¯ä»¥æŒ‰ç…§ä¸‹é¢çš„æ­¥éª¤è®¾ç½® - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> $env:CUDA_HOME = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" - # 或者 - (mmcv) PS C:\Users\xxx\mmcv> $env:CUDA_HOME = $env:CUDA_PATH_V10_2 # CUDA_PATH_V10_2 å·²ç»åœ¨çŽ¯å¢ƒå˜é‡ä¸­ - ``` - -2. 设置 CUDA 的目标架构 - - ```powershell - # è¿™é‡Œéœ€è¦æ”¹æˆä½ çš„æ˜¾å¡å¯¹åº”的目标架构 - (mmcv) PS C:\Users\xxx\mmcv> $env:TORCH_CUDA_ARCH_LIST="7.5" - ``` - - :::{note} - å¯ä»¥ç‚¹å‡» [cuda-gpus](https://developer.nvidia.com/cuda-gpus) 查看 GPU 的计算能力,也å¯ä»¥é€šè¿‡ CUDA 目录下的 deviceQuery.exe 工具查看 - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> &"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2\extras\demo_suite\deviceQuery.exe" - Device 0: "NVIDIA GeForce GTX 1660 SUPER" - CUDA Driver Version / Runtime Version 11.7 / 11.1 - CUDA Capability Major/Minor version number: 7.5 - ``` - - 上é¢çš„ 7.5 表示目标架构。注æ„:需把上é¢å‘½ä»¤çš„ v10.2 æ¢æˆä½ çš„ CUDA 版本。 - ::: - -3. 编译安装 - - ```powershell - (mmcv) PS C:\Users\xxx\mmcv> python setup.py build_ext # 如果æˆåŠŸ, cl 将被å¯åŠ¨ç”¨äºŽç¼–è¯‘ç®—å­ - (mmcv) PS C:\Users\xxx\mmcv> python setup.py develop # 安装 - ``` - - ```{note} - 如果你的 PyTorch 版本是 1.6.0,你å¯èƒ½ä¼šé‡åˆ°ä¸€äº› [issue](https://github.com/pytorch/pytorch/issues/42467) æåˆ°çš„错误,你å¯ä»¥å‚考这个 [pull request](https://github.com/pytorch/pytorch/pull/43380/files) 修改本地环境的 PyTorch æºä»£ç  - ``` - -##### 验è¯å®‰è£… - -```powershell -(mmcv) PS C:\Users\xxx\mmcv> python .dev_scripts/check_installation.py -``` - -如果上述命令没有报错,说明安装æˆåŠŸã€‚å¦‚æœ‰æŠ¥é”™ï¼Œè¯·æŸ¥çœ‹[问题解决页é¢](../faq.md)是å¦å·²ç»æœ‰è§£å†³æ–¹æ¡ˆã€‚ -如果没有找到解决方案,欢迎æ [issue](https://github.com/open-mmlab/mmcv/issues)。 - -### 编译 mmcv-lite - -如果你需è¦ä½¿ç”¨å’Œ PyTorch 相关的模å—ï¼Œè¯·ç¡®ä¿ PyTorch å·²ç»æˆåŠŸå®‰è£…åœ¨çŽ¯å¢ƒä¸­ï¼Œå¯ä»¥å‚考 [PyTorch 官方安装文档](https://pytorch.org/get-started/locally/#start-locally)。 - -1. 克隆代ç ä»“库 - - ```bash - git clone https://github.com/open-mmlab/mmcv.git - cd mmcv - ``` - -2. 开始编译 - - ```bash - MMCV_WITH_OPS=0 pip install -e . -v - ``` - -3. 验è¯å®‰è£… - - ```bash - python -c 'import mmcv;print(mmcv.__version__)' - ``` diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md deleted file mode 100644 index 54cdbd9..0000000 --- a/docs/zh_cn/get_started/installation.md +++ /dev/null @@ -1,369 +0,0 @@ -## 安装 MMCV - -MMCV 有两个版本: - -- **mmcv**: å®Œæ•´ç‰ˆï¼ŒåŒ…å«æ‰€æœ‰çš„特性以åŠä¸°å¯Œçš„开箱å³ç”¨çš„ CPU å’Œ CUDA ç®—å­ã€‚注æ„,完整版本å¯èƒ½éœ€è¦æ›´é•¿æ—¶é—´æ¥ç¼–译。 -- **mmcv-lite**: 精简版,ä¸åŒ…å« CPU å’Œ CUDA ç®—å­ä½†åŒ…å«å…¶ä½™æ‰€æœ‰ç‰¹æ€§å’ŒåŠŸèƒ½ï¼Œç±»ä¼¼ MMCV 1.0 之å‰çš„版本。如果你ä¸éœ€è¦ä½¿ç”¨ç®—å­çš„è¯ï¼Œç²¾ç®€ç‰ˆå¯ä»¥ä½œä¸ºä¸€ä¸ªè€ƒè™‘选项。 - -```{warning} -请ä¸è¦åœ¨åŒä¸€ä¸ªçŽ¯å¢ƒä¸­å®‰è£…ä¸¤ä¸ªç‰ˆæœ¬ï¼Œå¦åˆ™å¯èƒ½ä¼šé‡åˆ°ç±»ä¼¼ `ModuleNotFound` 的错误。在安装一个版本之å‰ï¼Œéœ€è¦å…ˆå¸è½½å¦ä¸€ä¸ªã€‚`如果 CUDA å¯ç”¨ï¼Œå¼ºçƒˆæŽ¨è安装 mmcv`。 -``` - -### 安装 mmcv - -在安装 mmcv 之å‰ï¼Œè¯·ç¡®ä¿ PyTorch å·²ç»æˆåŠŸå®‰è£…åœ¨çŽ¯å¢ƒä¸­ï¼Œå¯ä»¥å‚考 [PyTorch 官方安装文档](https://pytorch.org/get-started/locally/#start-locally)。å¯ä½¿ç”¨ä»¥ä¸‹å‘½ä»¤éªŒè¯ - -```bash -python -c 'import torch;print(torch.__version__)' -``` - -如果输出版本信æ¯ï¼Œåˆ™è¡¨ç¤º PyTorch 已安装。 - -#### 使用 mim 安装(推è) - -[mim](https://github.com/open-mmlab/mim) 是 OpenMMLab 项目的包管ç†å·¥å…·ï¼Œä½¿ç”¨å®ƒå¯ä»¥å¾ˆæ–¹ä¾¿åœ°å®‰è£… mmcv。 - -```bash -pip install -U openmim -mim install "mmcv>=2.0.0rc1" -``` - -如果å‘现上述的安装命令没有使用预编译包(以 `.whl` 结尾)而是使用æºç åŒ…(以 `.tar.gz` 结尾)安装,则有å¯èƒ½æ˜¯æˆ‘们没有æä¾›å’Œå½“å‰çŽ¯å¢ƒçš„ PyTorch 版本ã€CUDA 版本相匹é…çš„ mmcv 预编译包,此时,你å¯ä»¥[æºç å®‰è£… mmcv](build.md)。 - -
-使用预编译包的安装日志 - -Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
-Collecting mmcv
-Downloading https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/mmcv-2.0.0rc3-cp38-cp38-manylinux1_x86_64.whl - -
- -
-使用æºç åŒ…的安装日志 - -Looking in links: https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
-Collecting mmcv==2.0.0rc3
-Downloading mmcv-2.0.0rc3.tar.gz - -
- -如需安装指定版本的 mmcv,例如安装 2.0.0rc3 版本的 mmcv,å¯ä½¿ç”¨ä»¥ä¸‹å‘½ä»¤ - -```bash -mim install mmcv==2.0.0rc3 -``` - -:::{note} -如果你打算使用 `opencv-python-headless` è€Œä¸æ˜¯ `opencv-python`,例如在一个很å°çš„容器环境或者没有图形用户界é¢çš„æœåŠ¡å™¨ä¸­ï¼Œä½ å¯ä»¥å…ˆå®‰è£… `opencv-python-headless`,这样在安装 mmcv ä¾èµ–的过程中会跳过 `opencv-python`。 - -å¦å¤–,如果安装ä¾èµ–库的时间过长,å¯ä»¥æŒ‡å®š pypi æº - -```bash -mim install "mmcv>=2.0.0rc1" -i https://pypi.tuna.tsinghua.edu.cn/simple -``` - -::: - -安装完æˆåŽå¯ä»¥è¿è¡Œ [check_installation.py](https://github.com/open-mmlab/mmcv/blob/2.x/.dev_scripts/check_installation.py) 脚本检查 mmcv 是å¦å®‰è£…æˆåŠŸã€‚ - -#### 使用 pip 安装 - -使用以下命令查看 CUDA å’Œ PyTorch 的版本 - -```bash -python -c 'import torch;print(torch.__version__);print(torch.version.cuda)' -``` - -æ ¹æ®ç³»ç»Ÿçš„类型ã€CUDA 版本ã€PyTorch ç‰ˆæœ¬ä»¥åŠ MMCV 版本选择相应的安装命令 - - - - -
- - - - -
-

-
-
-
-
-如果在上é¢çš„下拉框中没有找到对应的版本,则å¯èƒ½æ˜¯æ²¡æœ‰å¯¹åº” PyTorch 或者 CUDA 或者 mmcv 版本的预编译包,此时,你å¯ä»¥[æºç å®‰è£… mmcv](build.md)。
-
-:::{note}
-PyTorch 在 1.x.0 å’Œ 1.x.1 之间通常是兼容的,故 mmcv åªæä¾› 1.x.0 的编译包。如果你
-çš„ PyTorch 版本是 1.x.1,你å¯ä»¥æ”¾å¿ƒåœ°å®‰è£…在 1.x.0 版本编译的 mmcv。例如,如果你的
-PyTorch 版本是 1.8.1,你å¯ä»¥æ”¾å¿ƒé€‰æ‹© 1.8.x。
-:::
-
-:::{note}
-如果你打算使用 `opencv-python-headless` è€Œä¸æ˜¯ `opencv-python`,例如在一个很å°çš„容器环境或者没有图形用户界é¢çš„æœåŠ¡å™¨ä¸­ï¼Œä½ å¯ä»¥å…ˆå®‰è£… `opencv-python-headless`,这样在安装 mmcv ä¾èµ–的过程中会跳过 `opencv-python`。
-
-å¦å¤–,如果安装ä¾èµ–库的时间过长,å¯ä»¥æŒ‡å®š pypi æº
-
-```bash
-pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html -i https://pypi.tuna.tsinghua.edu.cn/simple
-```
-
-:::
-
-安装完æˆåŽå¯ä»¥è¿è¡Œ [check_installation.py](https://github.com/open-mmlab/mmcv/blob/2.x/.dev_scripts/check_installation.py) 脚本检查 mmcv 是å¦å®‰è£…æˆåŠŸã€‚
-
-#### 使用 docker 镜åƒ
-
-å…ˆå°†ç®—æ³•åº“å…‹éš†åˆ°æœ¬åœ°å†æž„建镜åƒ
-
-```bash
-git clone https://github.com/open-mmlab/mmcv.git && cd mmcv
-docker build -t mmcv -f docker/release/Dockerfile .
-```
-
-也å¯ä»¥ç›´æŽ¥ä½¿ç”¨ä¸‹é¢çš„命令构建镜åƒ
-
-```bash
-docker build -t mmcv https://github.com/open-mmlab/mmcv.git#2.x:docker/release
-```
-
-[Dockerfile](release/Dockerfile) 默认安装最新的 mmcvï¼Œå¦‚æžœä½ æƒ³è¦æŒ‡å®šç‰ˆæœ¬ï¼Œå¯ä»¥ä½¿ç”¨ä¸‹é¢çš„命令
-
-```bash
-docker image build -t mmcv -f docker/release/Dockerfile --build-arg MMCV=2.0.0rc1 .
-```
-
-如果你想è¦ä½¿ç”¨å…¶ä»–版本的 PyTorch å’Œ CUDA,你å¯ä»¥åœ¨æž„å»ºé•œåƒæ—¶æŒ‡å®šå®ƒä»¬çš„版本。
-
-例如指定 PyTorch 的版本是 1.11,CUDA 的版本是 11.3
-
-```bash
-docker build -t mmcv -f docker/release/Dockerfile \
-    --build-arg PYTORCH=1.11.0 \
-    --build-arg CUDA=11.3 \
-    --build-arg CUDNN=8 \
-    --build-arg MMCV=2.0.0rc1 .
-```
-
-更多 PyTorch å’Œ CUDA 镜åƒå¯ä»¥ç‚¹å‡» [dockerhub/pytorch](https://hub.docker.com/r/pytorch/pytorch/tags) 查看。
-
-### 安装 mmcv-lite
-
-如果你需è¦ä½¿ç”¨å’Œ PyTorch 相关的模å—ï¼Œè¯·ç¡®ä¿ PyTorch å·²ç»æˆåŠŸå®‰è£…åœ¨çŽ¯å¢ƒä¸­ï¼Œå¯ä»¥å‚考 [PyTorch 官方安装文档](https://pytorch.org/get-started/locally/#start-locally)。
-
-```python
-pip install mmcv-lite
-```
diff --git a/docs/zh_cn/get_started/introduction.md b/docs/zh_cn/get_started/introduction.md
deleted file mode 100644
index 4c735b9..0000000
--- a/docs/zh_cn/get_started/introduction.md
+++ /dev/null
@@ -1,36 +0,0 @@
-## ä»‹ç» MMCV
-
-MMCV 是一个é¢å‘计算机视觉的基础库,它æä¾›äº†ä»¥ä¸‹åŠŸèƒ½ï¼š
-
-- [图åƒå’Œè§†é¢‘处ç†](../understand_mmcv/data_process.md)
-- [图åƒå’Œæ ‡æ³¨ç»“æžœå¯è§†åŒ–](../understand_mmcv/visualization.md)
-- [图åƒå˜æ¢](../understand_mmcv/data_transform.md)
-- [å¤šç§ CNN 网络结构](../understand_mmcv/cnn.md)
-- [高质é‡å®žçŽ°çš„å¸¸è§ CUDA ç®—å­](../understand_mmcv/ops.md)
-
-MMCV 支æŒå¤šç§å¹³å°ï¼ŒåŒ…括:
-
-- Linux
-- Windows
-- macOS
-
-它支æŒçš„ OpenMMLab 项目:
-
-- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图åƒåˆ†ç±»å·¥å…·ç®±
-- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱
-- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平å°
-- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准
-- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO 系列工具箱与测试基准
-- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱
-- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab å…¨æµç¨‹æ–‡å­—检测识别ç†è§£å·¥å…·ç®±
-- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab å§¿æ€ä¼°è®¡å·¥å…·ç®±
-- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab äººä½“å‚æ•°åŒ–模型工具箱与测试基准
-- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监ç£å­¦ä¹ å·¥å…·ç®±ä¸Žæµ‹è¯•基准
-- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准
-- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准
-- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频ç†è§£å·¥å…·ç®±
-- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平å°
-- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab å…‰æµä¼°è®¡å·¥å…·ç®±ä¸Žæµ‹è¯•基准
-- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图åƒè§†é¢‘编辑工具箱
-- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab å›¾ç‰‡è§†é¢‘ç”Ÿæˆæ¨¡åž‹å·¥å…·ç®±
-- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架
diff --git a/docs/zh_cn/switch_language.md b/docs/zh_cn/switch_language.md
deleted file mode 100644
index e4ac4b2..0000000
--- a/docs/zh_cn/switch_language.md
+++ /dev/null
@@ -1,3 +0,0 @@
-## English
-
-## 简体中文
diff --git a/docs/zh_cn/understand_mmcv/cnn.md b/docs/zh_cn/understand_mmcv/cnn.md
deleted file mode 100644
index 1f91041..0000000
--- a/docs/zh_cn/understand_mmcv/cnn.md
+++ /dev/null
@@ -1,114 +0,0 @@
-## å·ç§¯ç¥žç»ç½‘络
-
-我们为å·ç§¯ç¥žç»ç½‘络æä¾›äº†ä¸€äº›æž„建模å—ï¼ŒåŒ…æ‹¬å±‚æž„å»ºã€æ¨¡å—组件和æƒé‡åˆå§‹åŒ–。
-
-### 网络层的构建
-
-在è¿è¡Œå®žéªŒæ—¶ï¼Œæˆ‘们å¯èƒ½éœ€è¦å°è¯•åŒå±žä¸€ç§ç±»åž‹ä½†ä¸åŒé…置的层,但åˆä¸å¸Œæœ›æ¯æ¬¡éƒ½ä¿®æ”¹ä»£ç ã€‚于是我们æä¾›ä¸€äº›å±‚构建方法,å¯ä»¥ä»Žå­—典构建层,字典å¯ä»¥åœ¨é…置文件中é…置,也å¯ä»¥é€šè¿‡å‘½ä»¤è¡Œå‚数指定。
-
-#### 用法
-
-一个简å•的例å­ï¼š
-
-```python
-from mmcv.cnn import build_conv_layer
-
-cfg = dict(type='Conv3d')
-layer = build_conv_layer(cfg, in_channels=3, out_channels=8, kernel_size=3)
-```
-
-- `build_conv_layer`: 支æŒçš„类型包括 Conv1dã€Conv2dã€Conv3dã€Conv (Conv是Conv2d的别å)
-- `build_norm_layer`: 支æŒçš„类型包括 BN1dã€BN2dã€BN3dã€BN (alias for BN2d)ã€SyncBNã€GNã€LNã€IN1dã€IN2dã€IN3dã€IN(IN是IN2d的别å)
-- `build_activation_layer`:支æŒçš„类型包括 ReLUã€LeakyReLUã€PReLUã€RReLUã€ReLU6ã€ELUã€Sigmoidã€Tanhã€GELU
-- `build_upsample_layer`: 支æŒçš„类型包括 nearestã€bilinearã€deconvã€pixel_shuffle
-- `build_padding_layer`: 支æŒçš„类型包括 zeroã€reflectã€replicate
-
-#### 拓展
-
-我们还å…è®¸è‡ªå®šä¹‰å±‚å’Œç®—å­æ¥æ‰©å±•构建方法。
-
-1. 编写和注册自己的模å—:
-
-   ```python
-   from mmengine.registry import MODELS
-
-   @MODELS.register_module()
-   class MyUpsample:
-
-       def __init__(self, scale_factor):
-           pass
-
-       def forward(self, x):
-           pass
-   ```
-
-2. 在æŸå¤„导入 `MyUpsample` (例如 `__init__.py` )然åŽä½¿ç”¨å®ƒï¼š
-
-   ```python
-   from mmcv.cnn import build_upsample_layer
-
-   cfg = dict(type='MyUpsample', scale_factor=2)
-   layer = build_upsample_layer(cfg)
-   ```
-
-### 模å—组件
-
-我们还æä¾›äº†å¸¸ç”¨çš„æ¨¡å—组件,以方便网络构建。
-å·ç§¯ç»„ä»¶ `ConvModule` ç”± convolutionã€normalization以åŠactivation layers 组æˆï¼Œæ›´å¤šç»†èŠ‚è¯·å‚考 [ConvModule api](api.html#mmcv.cnn.ConvModule)。
-
-```python
-from mmcv.cnn import ConvModule
-
-# conv + bn + relu
-conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN'))
-# conv + gn + relu
-conv = ConvModule(3, 8, 2, norm_cfg=dict(type='GN', num_groups=2))
-# conv + relu
-conv = ConvModule(3, 8, 2)
-# conv
-conv = ConvModule(3, 8, 2, act_cfg=None)
-# conv + leaky relu
-conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU'))
-# bn + conv + relu
-conv = ConvModule(
-    3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act'))
-```
-
-### Model Zoo
-
-除了`torchvision`的预训练模型,我们还æä¾›ä»¥ä¸‹ CNN 的预训练模型:
-
-- VGG Caffe
-- ResNet Caffe
-- ResNeXt
-- ResNet with Group Normalization
-- ResNet with Group Normalization and Weight Standardization
-- HRNetV2
-- Res2Net
-- RegNet
-
-#### Model URLs in JSON
-
-MMCV中的Model Zoo Link ç”± JSON 文件管ç†ã€‚ json 文件由模型åç§°åŠå…¶url或path的键值对组æˆ,一个json文件å¯èƒ½ç±»ä¼¼äºŽ:
-
-```json
-{
-    "model_a": "https://example.com/models/model_a_9e5bac.pth",
-    "model_b": "pretrain/model_b_ab3ef2c.pth"
-}
-```
-
-å¯ä»¥åœ¨[此处](https://github.com/open-mmlab/mmcv/blob/master/mmcv/model_zoo/open_mmlab.json)找到托管在 OpenMMLab AWS 上的预训练模型的默认链接。
-
-ä½ å¯ä»¥é€šè¿‡å°† `open-mmlab.json` 放在 `MMCV_HOME`下æ¥è¦†ç›–默认链接,如果在环境中找ä¸åˆ°`MMCV_HOME`,则默认使用 `~/.cache/mmcv`。当然你也å¯ä»¥ä½¿ç”¨å‘½ä»¤ `export MMCV_HOME=/your/path`æ¥è®¾ç½®è‡ªå·±çš„路径。
-
-外部的json文件将被åˆå¹¶ä¸ºé»˜è®¤æ–‡ä»¶ï¼Œå¦‚果相åŒçš„键出现在外部`json`和默认`json`中,则将使用外部`json`。
-
-#### Load Checkpoint
-
-`mmcv.load_checkpoint()`çš„å‚æ•°`filename`支æŒä»¥ä¸‹ç±»åž‹ï¼š
-
-- filepath: `checkpoint`路径
-- `http://xxx` and `https://xxx`: 下载checkpoint的链接,文件å中必需包å«`SHA256`åŽç¼€
-- `torchvision://xxx`: `torchvision.models`中的模型链接,更多细节å‚考 [torchvision](https://pytorch.org/docs/stable/torchvision/models.html)
-- `open-mmlab://xxx`: 默认和其他 json 文件中æä¾›çš„æ¨¡åž‹é“¾æŽ¥æˆ–文件路径
diff --git a/docs/zh_cn/understand_mmcv/data_transform.md b/docs/zh_cn/understand_mmcv/data_transform.md
deleted file mode 100644
index 47d16e1..0000000
--- a/docs/zh_cn/understand_mmcv/data_transform.md
+++ /dev/null
@@ -1,341 +0,0 @@
-# æ•°æ®å˜æ¢
-
-在 OpenMMLab 算法库中,数æ®é›†çš„æž„建和数æ®çš„准备是相互解耦的。通常,数æ®é›†çš„æž„建åªå¯¹æ•°æ®é›†è¿›è¡Œè§£æžï¼Œè®°å½•æ¯ä¸ªæ ·æœ¬çš„基本信æ¯ï¼›è€Œæ•°æ®çš„准备则是通过一系列的数æ®å˜æ¢ï¼Œæ ¹æ®æ ·æœ¬çš„基本信æ¯è¿›è¡Œæ•°æ®åŠ è½½ã€é¢„处ç†ã€æ ¼å¼åŒ–ç­‰æ“作。
-
-## æ•°æ®å˜æ¢çš„设计
-
-在 MMCV 中,我们使用å„ç§å¯è°ƒç”¨çš„æ•°æ®å˜æ¢ç±»æ¥è¿›è¡Œæ•°æ®çš„æ“ä½œã€‚è¿™äº›æ•°æ®å˜æ¢ç±»å¯ä»¥æŽ¥å—若干é…ç½®å‚æ•°è¿›è¡Œå®žä¾‹åŒ–,之åŽé€šè¿‡è°ƒç”¨çš„æ–¹å¼å¯¹è¾“入的数æ®å­—典进行处ç†ã€‚åŒæ—¶ï¼Œæˆ‘们约定所有数æ®å˜æ¢éƒ½æŽ¥å—一个字典作为输入,并将处ç†åŽçš„æ•°æ®è¾“出为一个字典。一个简å•的例å­å¦‚下:
-
-```python
->>> import numpy as np
->>> from mmcv.transforms import Resize
->>>
->>> transform = Resize(scale=(224, 224))
->>> data_dict = {'img': np.random.rand(256, 256, 3)}
->>> data_dict = transform(data_dict)
->>> print(data_dict['img'].shape)
-(224, 224, 3)
-```
-
-æ•°æ®å˜æ¢ç±»ä¼šè¯»å–输入字典的æŸäº›å­—段,并且å¯èƒ½æ·»åŠ ã€æˆ–者更新æŸäº›å­—段。这些字段的键大部分情况下是固定的,如 `Resize` 会固定地读å–输入字典中的 `"img"` 等字段。我们å¯ä»¥åœ¨å¯¹åº”类的文档中了解对输入输出字段的约定。
-
-```{note}
-默认情况下,在需è¦å›¾åƒå°ºå¯¸ä½œä¸º**åˆå§‹åŒ–傿•°**的数æ®å˜æ¢ (如Resize, Pad) 中,图åƒå°ºå¯¸çš„顺åºå‡ä¸º (width, height)。在数æ®å˜æ¢**返回的字典**中,图åƒç›¸å…³çš„尺寸, 如 `img_shape`ã€`ori_shape`ã€`pad_shape` 等,å‡ä¸º (height, width)。
-```
-
-MMCV 为所有的数æ®å˜æ¢ç±»æä¾›äº†ä¸€ä¸ªç»Ÿä¸€çš„基类 (`BaseTransform`):
-
-```python
-class BaseTransform(metaclass=ABCMeta):
-
-    def __call__(self, results: dict) -> dict:
-
-        return self.transform(results)
-
-    @abstractmethod
-    def transform(self, results: dict) -> dict:
-        pass
-```
-
-所有的数æ®å˜æ¢ç±»éƒ½éœ€è¦ç»§æ‰¿ `BaseTransform`,并实现 `transform` 方法。`transform` 方法的输入和输出å‡ä¸ºä¸€ä¸ªå­—典。在**自定义数æ®å˜æ¢ç±»**一节中,我们会更详细地介ç»å¦‚何实现一个数æ®å˜æ¢ç±»ã€‚
-
-## æ•°æ®æµæ°´çº¿
-
-如上所述,所有数æ®å˜æ¢çš„è¾“å…¥å’Œè¾“å‡ºéƒ½æ˜¯ä¸€ä¸ªå­—å…¸ï¼Œè€Œä¸”æ ¹æ® OpenMMLab 中 [有关数æ®é›†çš„约定](TODO),数æ®é›†ä¸­æ¯ä¸ªæ ·æœ¬çš„基本信æ¯éƒ½æ˜¯ä¸€ä¸ªå­—典。这样一æ¥ï¼Œæˆ‘们å¯ä»¥å°†æ‰€æœ‰çš„æ•°æ®å˜æ¢æ“ä½œé¦–å°¾ç›¸æŽ¥ï¼Œç»„åˆæˆä¸ºä¸€æ¡æ•°æ®æµæ°´çº¿ï¼ˆdata pipeline),输入数æ®é›†ä¸­æ ·æœ¬çš„ä¿¡æ¯å­—典,输出完æˆä¸€ç³»åˆ—处ç†åŽçš„ä¿¡æ¯å­—典。
-
-ä»¥åˆ†ç±»ä»»åŠ¡ä¸ºä¾‹ï¼Œæˆ‘ä»¬åœ¨ä¸‹å›¾å±•ç¤ºäº†ä¸€ä¸ªå…¸åž‹çš„æ•°æ®æµæ°´çº¿ã€‚对æ¯ä¸ªæ ·æœ¬ï¼Œæ•°æ®é›†ä¸­ä¿å­˜çš„åŸºæœ¬ä¿¡æ¯æ˜¯ä¸€ä¸ªå¦‚å›¾ä¸­æœ€å·¦ä¾§æ‰€ç¤ºçš„å­—å…¸ï¼Œä¹‹åŽæ¯ç»è¿‡ä¸€ä¸ªç”±è“色å—代表的数æ®å˜æ¢æ“作,数æ®å­—典中都会加入新的字段(标记为绿色)或更新现有的字段(标记为橙色)。
-
-
- -
- -在é…ç½®æ–‡ä»¶ä¸­ï¼Œæ•°æ®æµæ°´çº¿æ˜¯ä¸€ä¸ªè‹¥å¹²æ•°æ®å˜æ¢é…置字典组æˆçš„列表,æ¯ä¸ªæ•°æ®é›†éƒ½éœ€è¦è®¾ç½®å‚æ•° `pipeline` æ¥å®šä¹‰è¯¥æ•°æ®é›†éœ€è¦è¿›è¡Œçš„æ•°æ®å‡†å¤‡æ“ä½œã€‚å¦‚ä¸Šæ•°æ®æµæ°´çº¿åœ¨é…置文件中的é…置如下: - -```python -pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', size=256, keep_ratio=True), - dict(type='CenterCrop', crop_size=224), - dict(type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375]), - dict(type='ClsFormatBundle') -] - -dataset = dict( - ... - pipeline=pipeline, - ... -) -``` - -## 常用的数æ®å˜æ¢ç±» - -按照功能,常用的数æ®å˜æ¢ç±»å¯ä»¥å¤§è‡´åˆ†ä¸ºæ•°æ®åŠ è½½ã€æ•°æ®é¢„处ç†ä¸Žå¢žå¼ºã€æ•°æ®æ ¼å¼åŒ–。在 MMCV 中,我们æä¾›äº†ä¸€äº›å¸¸ç”¨çš„æ•°æ®å˜æ¢ç±»å¦‚下: - -### æ•°æ®åŠ è½½ - -为了支æŒå¤§è§„模数æ®é›†çš„加载,通常在 `Dataset` åˆå§‹åŒ–æ—¶ä¸åŠ è½½æ•°æ®ï¼ŒåªåŠ è½½ç›¸åº”çš„è·¯å¾„ã€‚å› æ­¤éœ€è¦åœ¨æ•°æ®æµæ°´çº¿ä¸­è¿›è¡Œå…·ä½“æ•°æ®çš„加载。 - -| class | 功能 | -| :-------------------------: | :---------------------------------------: | -| [`LoadImageFromFile`](TODO) | æ ¹æ®è·¯å¾„åŠ è½½å›¾åƒ | -| [`LoadAnnotations`](TODO) | 加载和组织标注信æ¯ï¼Œå¦‚ bboxã€è¯­ä¹‰åˆ†å‰²å›¾ç­‰ | - -### æ•°æ®é¢„处ç†åŠå¢žå¼º - -æ•°æ®é¢„处ç†å’Œå¢žå¼ºé€šå¸¸æ˜¯å¯¹å›¾åƒæœ¬èº«è¿›è¡Œå˜æ¢ï¼Œå¦‚è£å‰ªã€å¡«å……ã€ç¼©æ”¾ç­‰ã€‚ - -| class | 功能 | -| :------------------------------: | :--------------------------------: | -| [`Pad`](TODO) | 填充图åƒè¾¹ç¼˜ | -| [`CenterCrop`](TODO) | 居中è£å‰ª | -| [`Normalize`](TODO) | 对图åƒè¿›è¡Œå½’一化 | -| [`Resize`](TODO) | æŒ‰ç…§æŒ‡å®šå°ºå¯¸æˆ–æ¯”ä¾‹ç¼©æ”¾å›¾åƒ | -| [`RandomResize`](TODO) | 缩放图åƒè‡³æŒ‡å®šèŒƒå›´çš„éšæœºå°ºå¯¸ | -| [`RandomMultiscaleResize`](TODO) | 缩放图åƒè‡³å¤šä¸ªå°ºå¯¸ä¸­çš„éšæœºä¸€ä¸ªå°ºå¯¸ | -| [`RandomGrayscale`](TODO) | éšæœºç°åº¦åŒ– | -| [`RandomFlip`](TODO) | 图åƒéšæœºç¿»è½¬ | -| [`MultiScaleFlipAug`](TODO) | 支æŒç¼©æ”¾å’Œç¿»è½¬çš„æµ‹è¯•æ—¶æ•°æ®å¢žå¼º | - -### æ•°æ®æ ¼å¼åŒ– - -æ•°æ®æ ¼å¼åŒ–æ“作通常是对数æ®è¿›è¡Œçš„类型转æ¢ã€‚ - -| class | 功能 | -| :---------------------: | :-------------------------------: | -| [`ToTensor`](TODO) | 将指定的数æ®è½¬æ¢ä¸º `torch.Tensor` | -| [`ImageToTensor`](TODO) | 将图åƒè½¬æ¢ä¸º `torch.Tensor` | - -## 自定义数æ®å˜æ¢ç±» - -è¦å®žçŽ°ä¸€ä¸ªæ–°çš„æ•°æ®å˜æ¢ç±»ï¼Œéœ€è¦ç»§æ‰¿ `BaseTransform`,并实现 `transform` 方法。这里,我们使用一个简å•çš„ç¿»è½¬å˜æ¢ï¼ˆ`MyFlip`)作为示例: - -```python -import random -import mmcv -from mmcv.transforms import BaseTransform, TRANSFORMS - -@TRANSFORMS.register_module() -class MyFlip(BaseTransform): - def __init__(self, direction: str): - super().__init__() - self.direction = direction - - def transform(self, results: dict) -> dict: - img = results['img'] - results['img'] = mmcv.imflip(img, direction=self.direction) - return results -``` - -从而,我们å¯ä»¥å®žä¾‹åŒ–一个 `MyFlip` 对象,并将之作为一个å¯è°ƒç”¨å¯¹è±¡ï¼Œæ¥å¤„ç†æˆ‘们的数æ®å­—典。 - -```python -import numpy as np - -transform = MyFlip(direction='horizontal') -data_dict = {'img': np.random.rand(224, 224, 3)} -data_dict = transform(data_dict) -processed_img = data_dict['img'] -``` - -åˆæˆ–者,在é…置文件的 pipeline 中使用 `MyFlip` å˜æ¢ - -```python -pipeline = [ - ... - dict(type='MyFlip', direction='horizontal'), - ... -] -``` - -éœ€è¦æ³¨æ„的是,如需在é…置文件中使用,需è¦ä¿è¯ `MyFlip` 类所在的文件在è¿è¡Œæ—¶èƒ½å¤Ÿè¢«å¯¼å…¥ã€‚ - -## å˜æ¢åŒ…装 - -å˜æ¢åŒ…装是一ç§ç‰¹æ®Šçš„æ•°æ®å˜æ¢ç±»ï¼Œä»–ä»¬æœ¬èº«å¹¶ä¸æ“作数æ®å­—典中的图åƒã€æ ‡ç­¾ç­‰ä¿¡æ¯ï¼Œè€Œæ˜¯å¯¹å…¶ä¸­å®šä¹‰çš„æ•°æ®å˜æ¢çš„行为进行增强。 - -### 字段映射(KeyMapper) - -字段映射包装(`KeyMapper`)用于对数æ®å­—典中的字段进行映射。例如,一般的图åƒå¤„ç†å˜æ¢éƒ½ä»Žæ•°æ®å­—典中的 `"img"` å­—æ®µèŽ·å¾—å€¼ã€‚ä½†æœ‰äº›æ—¶å€™ï¼Œæˆ‘ä»¬å¸Œæœ›è¿™äº›å˜æ¢å¤„ç†æ•°æ®å­—典中其他字段中的图åƒï¼Œæ¯”如 `"gt_img"` 字段。 - -如果é…åˆæ³¨å†Œå™¨å’Œé…置文件使用的è¯ï¼Œåœ¨é…置文件中数æ®é›†çš„ `pipeline` 中如下例使用字段映射包装: - -```python -pipeline = [ - ... - dict(type='KeyMapper', - mapping={ - 'img': 'gt_img', # å°† "gt_img" 字段映射至 "img" 字段 - 'mask': ..., # ä¸ä½¿ç”¨åŽŸå§‹æ•°æ®ä¸­çš„ "mask" 字段。å³å¯¹äºŽè¢«åŒ…装的数æ®å˜æ¢ï¼Œæ•°æ®ä¸­ä¸åŒ…å« "mask" 字段 - }, - auto_remap=True, # 在完æˆå˜æ¢åŽï¼Œå°† "img" 釿˜ å°„回 "gt_img" 字段 - transforms=[ - # 在 `RandomFlip` å˜æ¢ç±»ä¸­ï¼Œæˆ‘们åªéœ€è¦æ“作 "img" 字段å³å¯ - dict(type='RandomFlip'), - ]) - ... -] -``` - -利用字段映射包装,我们在实现数æ®å˜æ¢ç±»æ—¶ï¼Œä¸éœ€è¦è€ƒè™‘在 `transform` 方法中考虑å„ç§å¯èƒ½çš„输入字段å,åªéœ€è¦å¤„ç†é»˜è®¤çš„字段å³å¯ã€‚ - -### éšæœºé€‰æ‹©ï¼ˆRandomChoiceï¼‰å’Œéšæœºæ‰§è¡Œï¼ˆRandomApply) - -éšæœºé€‰æ‹©åŒ…装(`RandomChoice`)用于从一系列数æ®å˜æ¢ç»„åˆä¸­éšæœºåº”ç”¨ä¸€ä¸ªæ•°æ®å˜æ¢ç»„åˆã€‚利用这一包装,我们å¯ä»¥ç®€å•地实现一些数æ®å¢žå¼ºåŠŸèƒ½ï¼Œæ¯”å¦‚ AutoAugment。 - -如果é…åˆæ³¨å†Œå™¨å’Œé…置文件使用的è¯ï¼Œåœ¨é…置文件中数æ®é›†çš„ `pipeline` ä¸­å¦‚ä¸‹ä¾‹ä½¿ç”¨éšæœºé€‰æ‹©åŒ…装: - -```python -pipeline = [ - ... - dict(type='RandomChoice', - transforms=[ - [ - dict(type='Posterize', bits=4), - dict(type='Rotate', angle=30.) - ], # 第一ç§éšæœºå˜åŒ–ç»„åˆ - [ - dict(type='Equalize'), - dict(type='Rotate', angle=30) - ], # 第二ç§éšæœºå˜æ¢ç»„åˆ - ], - prob=[0.4, 0.6] # 两ç§éšæœºå˜æ¢ç»„åˆå„自的选用概率 - ) - ... -] -``` - -éšæœºæ‰§è¡ŒåŒ…装(`RandomApply`ï¼‰ç”¨äºŽä»¥æŒ‡å®šæ¦‚çŽ‡éšæœºæ‰§è¡Œæ•°æ®å˜æ¢ç»„åˆã€‚例如: - -```python -pipeline = [ - ... - dict(type='RandomApply', - transforms=[dict(type='Rotate', angle=30.)], - prob=0.3) # 以 0.3 的概率执行被包装的数æ®å˜æ¢ - ... -] -``` - -### 多目标扩展(TransformBroadcaster) - -通常,一个数æ®å˜æ¢ç±»åªä¼šä»Žä¸€ä¸ªå›ºå®šçš„å­—æ®µè¯»å–æ“作目标。虽然我们也å¯ä»¥ä½¿ç”¨ `KeyMapper` æ¥æ”¹å˜è¯»å–çš„å­—æ®µï¼Œä½†æ— æ³•å°†å˜æ¢ä¸€æ¬¡æ€§åº”用于多个字段的数æ®ã€‚为了实现这一功能,我们需è¦å€ŸåŠ©å¤šç›®æ ‡æ‰©å±•åŒ…è£…ï¼ˆ`TransformBroadcaster`)。 - -多目标扩展包装(`TransformBroadcaster`)有两个用法,一是将数æ®å˜æ¢ä½œç”¨äºŽæŒ‡å®šçš„多个字段,二是将数æ®å˜æ¢ä½œç”¨äºŽæŸä¸ªå­—段下的一组目标中。 - -1. 应用于多个字段 - - å‡è®¾æˆ‘们需è¦å°†æ•°æ®å˜æ¢åº”用于 `"lq"` (low-quality) å’Œ `"gt"` (ground-truth) 两个字段中的图åƒä¸Šã€‚ - - ```python - pipeline = [ - dict(type='TransformBroadcaster', - # 分别应用于 "lq" å’Œ "gt" 两个字段,并将二者应设置 "img" 字段 - mapping={'img': ['lq', 'gt']}, - # 在完æˆå˜æ¢åŽï¼Œå°† "img" å­—æ®µé‡æ˜ å°„回原先的字段 - auto_remap=True, - # 是å¦åœ¨å¯¹å„ç›®æ ‡çš„å˜æ¢ä¸­å…±äº«éšæœºå˜é‡ - # 更多介ç»å‚加åŽç»­ç« èŠ‚ï¼ˆéšæœºå˜é‡å…±äº«ï¼‰ - share_random_params=True, - transforms=[ - # 在 `RandomFlip` å˜æ¢ç±»ä¸­ï¼Œæˆ‘们åªéœ€è¦æ“作 "img" 字段å³å¯ - dict(type='RandomFlip'), - ]) - ] - ``` - - 在多目标扩展的 `mapping` è®¾ç½®ä¸­ï¼Œæˆ‘ä»¬åŒæ ·å¯ä»¥ä½¿ç”¨ `...` æ¥å¿½ç•¥æŒ‡å®šçš„原始字段。如以下例å­ä¸­ï¼Œè¢«åŒ…裹的 `RandomCrop` 会对字段 `"img"` 中的图åƒè¿›è¡Œè£å‰ªï¼Œå¹¶ä¸”在字段 `"img_shape"` 存在时更新剪è£åŽçš„图åƒå¤§å°ã€‚å¦‚æžœæˆ‘ä»¬å¸Œæœ›åŒæ—¶å¯¹ä¸¤ä¸ªå›¾åƒå­—段 `"lq"` å’Œ `"gt"` 进行相åŒçš„éšæœºè£å‰ªï¼Œä½†åªæ›´æ–°ä¸€æ¬¡ `"img_shape"` 字段,å¯ä»¥é€šè¿‡ä¾‹å­ä¸­çš„æ–¹å¼å®žçŽ°ï¼š - - ```python - pipeline = [ - dict(type='TransformBroadcaster', - mapping={ - 'img': ['lq', 'gt'], - 'img_shape': ['img_shape', ...], - }, - # 在完æˆå˜æ¢åŽï¼Œå°† "img" å’Œ "img_shape" å­—æ®µé‡æ˜ å°„回原先的字段 - auto_remap=True, - # 是å¦åœ¨å¯¹å„ç›®æ ‡çš„å˜æ¢ä¸­å…±äº«éšæœºå˜é‡ - # 更多介ç»å‚加åŽç»­ç« èŠ‚ï¼ˆéšæœºå˜é‡å…±äº«ï¼‰ - share_random_params=True, - transforms=[ - # `RandomCrop` 类中会æ“作 "img" å’Œ "img_shape" 字段。若 "img_shape" 空缺, - # åˆ™åªæ“作 "img" - dict(type='RandomCrop'), - ]) - ] - ``` - -2. 应用于一个字段的一组目标 - - å‡è®¾æˆ‘们需è¦å°†æ•°æ®å˜æ¢åº”用于 `"images"` 字段,该字段为一个图åƒç»„æˆçš„ list。 - - ```python - pipeline = [ - dict(type='TransformBroadcaster', - # å°† "images" 字段下的æ¯å¼ å›¾ç‰‡æ˜ å°„至 "img" 字段 - mapping={'img': 'images'}, - # 在完æˆå˜æ¢åŽï¼Œå°† "img" å­—æ®µä¸‹çš„å›¾ç‰‡é‡æ˜ å°„回 "images" 字段的列表中 - auto_remap=True, - # 是å¦åœ¨å¯¹å„ç›®æ ‡çš„å˜æ¢ä¸­å…±äº«éšæœºå˜é‡ - share_random_params=True, - transforms=[ - # 在 `RandomFlip` å˜æ¢ç±»ä¸­ï¼Œæˆ‘们åªéœ€è¦æ“作 "img" 字段å³å¯ - dict(type='RandomFlip'), - ]) - ] - ``` - -#### 装饰器 `cache_randomness` - -在 `TransformBroadcaster` 中,我们æä¾›äº† `share_random_params` é€‰é¡¹æ¥æ”¯æŒåœ¨å¤šæ¬¡æ•°æ®å˜æ¢ä¸­å…±äº«éšæœºçŠ¶æ€ã€‚ä¾‹å¦‚ï¼Œåœ¨è¶…åˆ†è¾¨çŽ‡ä»»åŠ¡ä¸­ï¼Œæˆ‘ä»¬å¸Œæœ›å°†éšæœºå˜æ¢**åŒæ­¥**作用于低分辨率图åƒå’ŒåŽŸå§‹å›¾åƒã€‚如果我们希望在自定义的数æ®å˜æ¢ç±»ä¸­ä½¿ç”¨è¿™ä¸€åŠŸèƒ½ï¼Œéœ€è¦åœ¨ç±»ä¸­æ ‡æ³¨å“ªäº›éšæœºå˜é‡æ˜¯æ”¯æŒå…±äº«çš„。这å¯ä»¥é€šè¿‡è£…饰器 `cache_randomness` æ¥å®žçŽ°ã€‚ - -以上文中的 `MyFlip` ä¸ºä¾‹ï¼Œæˆ‘ä»¬å¸Œæœ›ä»¥ä¸€å®šçš„æ¦‚çŽ‡éšæœºæ‰§è¡Œç¿»è½¬ï¼š - -```python -from mmcv.transforms.utils import cache_randomness - -@TRANSFORMS.register_module() -class MyRandomFlip(BaseTransform): - def __init__(self, prob: float, direction: str): - super().__init__() - self.prob = prob - self.direction = direction - - @cache_randomness # 标注该方法的输出为å¯å…±äº«çš„éšæœºå˜é‡ - def do_flip(self): - flip = True if random.random() > self.prob else False - return flip - - def transform(self, results: dict) -> dict: - img = results['img'] - if self.do_flip(): - results['img'] = mmcv.imflip(img, direction=self.direction) - return results -``` - -在上é¢çš„例å­ä¸­ï¼Œæˆ‘们用`cache_randomness` 装饰 `do_flip`方法,å³å°†è¯¥æ–¹æ³•返回值 `flip` 标注为一个支æŒå…±äº«çš„éšæœºå˜é‡ã€‚进而,在 `TransformBroadcaster` å¯¹å¤šä¸ªç›®æ ‡çš„å˜æ¢ä¸­ï¼Œè¿™ä¸€å˜é‡çš„å€¼éƒ½ä¼šä¿æŒä¸€è‡´ã€‚ - -#### 装饰器 `avoid_cache_randomness` - -在一些情况下,我们无法将数æ®å˜æ¢ä¸­äº§ç”Ÿéšæœºå˜é‡çš„过程å•独放在类方法中。例如数æ®å˜æ¢ä¸­ä½¿ç”¨çš„æ¥è‡ªç¬¬ä¸‰æ–¹åº“çš„æ¨¡å—,这些模å—å°†éšæœºå˜é‡ç›¸å…³çš„部分å°è£…在了内部,导致无法将其抽出为数æ®å˜æ¢çš„类方法。这样的数æ®å˜æ¢æ— æ³•通过装饰器 `cache_randomness` 标注支æŒå…±äº«çš„éšæœºå˜é‡ï¼Œè¿›è€Œæ— æ³•åœ¨å¤šç›®æ ‡æ‰©å±•æ—¶å…±äº«éšæœºå˜é‡ã€‚ - -为了é¿å…在多目标扩展中误用此类数æ®å˜æ¢ï¼Œæˆ‘们æä¾›äº†å¦ä¸€ä¸ªè£…饰器 `avoid_cache_randomness`,用æ¥å¯¹æ­¤ç±»æ•°æ®å˜æ¢è¿›è¡Œæ ‡è®°ï¼š - -```python -from mmcv.transforms.utils import avoid_cache_randomness - -@TRANSFORMS.register_module() -@avoid_cache_randomness -class MyRandomTransform(BaseTransform): - - def transform(self, results: dict) -> dict: - ... -``` - -用 `avoid_cache_randomness` 标记的数æ®å˜æ¢ç±»ï¼Œå½“其实例被 `TransformBroadcaster` åŒ…è£…ä¸”å°†å‚æ•° `share_random_params` 设置为 True 时,会抛出异常,以此æé†’用户ä¸èƒ½è¿™æ ·ä½¿ç”¨ã€‚ - -在使用 `avoid_cache_randomness` æ—¶éœ€è¦æ³¨æ„以下几点: - -1. `avoid_cache_randomness` åªç”¨äºŽè£…饰数æ®å˜æ¢ç±»ï¼ˆBaseTransfrom çš„å­ç±»ï¼‰ï¼Œè€Œä¸èƒ½ç”¨ä¸Žè£…饰其他一般的类ã€ç±»æ–¹æ³•或函数 -2. 被 `avoid_cache_randomness` 修饰的数æ®å˜æ¢ä½œä¸ºåŸºç±»æ—¶ï¼Œå…¶å­ç±»å°†**ä¸ä¼šç»§æ‰¿**这一特性。如果å­ç±»ä»æ— æ³•å…±äº«éšæœºå˜é‡ï¼Œåˆ™åº”冿¬¡ä½¿ç”¨ `avoid_cache_randomness` 修饰 -3. åªæœ‰å½“一个数æ®å˜æ¢å…·æœ‰éšæœºæ€§ï¼Œä¸”æ— æ³•å…±äº«éšæœºå‚数时,æ‰éœ€è¦ä»¥ `avoid_cache_randomness` ä¿®é¥°ã€‚æ— éšæœºæ€§çš„æ•°æ®å˜æ¢ä¸éœ€è¦ä¿®é¥° diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md deleted file mode 100644 index 11b885d..0000000 --- a/docs/zh_cn/understand_mmcv/ops.md +++ /dev/null @@ -1,66 +0,0 @@ -## ç®—å­ - -MMCV æä¾›äº†æ£€æµ‹ã€åˆ†å‰²ç­‰ä»»åŠ¡ä¸­å¸¸ç”¨çš„ç®—å­ - -| Device | CPU | CUDA | MLU | MPS | Ascend | -| ---------------------------- | --- | ---- | --- | --- | ------ | -| ActiveRotatedFilter | √ | √ | | | | -| AssignScoreWithK | | √ | | | | -| BallQuery | | √ | | | | -| BBoxOverlaps | | √ | √ | √ | √ | -| BorderAlign | | √ | | | | -| BoxIouRotated | √ | √ | | | | -| BoxIouQuadri | √ | √ | | | | -| CARAFE | | √ | √ | | | -| ChamferDistance | | √ | | | | -| CrissCrossAttention | | √ | | | | -| ContourExpand | √ | | | | | -| ConvexIoU | | √ | | | | -| CornerPool | | √ | | | | -| Correlation | | √ | | | | -| Deformable Convolution v1/v2 | √ | √ | | | √ | -| Deformable RoIPool | | √ | √ | | √ | -| DiffIoURotated | | √ | | | | -| DynamicScatter | | √ | | | | -| FurthestPointSample | | √ | | | | -| FurthestPointSampleWithDist | | √ | | | | -| FusedBiasLeakyrelu | | √ | | | √ | -| GatherPoints | | √ | | | √ | -| GroupPoints | | √ | | | | -| Iou3d | | √ | √ | | | -| KNN | | √ | | | | -| MaskedConv | | √ | √ | | √ | -| MergeCells | | √ | | | | -| MinAreaPolygon | | √ | | | | -| ModulatedDeformConv2d | √ | √ | | | √ | -| MultiScaleDeformableAttn | | √ | √ | | | -| NMS | √ | √ | √ | | √ | -| NMSRotated | √ | √ | | | √ | -| NMSQuadri | √ | √ | | | | -| PixelGroup | √ | | | | | -| PointsInBoxes | √ | √ | | | | -| PointsInPolygons | | √ | | | | -| PSAMask | √ | √ | √ | | √ | -| RotatedFeatureAlign | √ | √ | | | | -| RoIPointPool3d | | √ | √ | | | -| RoIPool | | √ | √ | | √ | -| RoIAlignRotated | √ | √ | √ | | | -| RiRoIAlignRotated | | √ | | | | -| RoIAlign | √ | √ | √ | | | -| RoIAwarePool3d | | √ | √ | | | -| SAConv2d | | √ | | | | -| SigmoidFocalLoss | | √ | √ | | √ | -| SoftmaxFocalLoss | | √ | | | √ | -| SoftNMS | | √ | | | | -| Sparse Convolution | | √ | | | | -| Synchronized BatchNorm | | √ | | | | -| ThreeInterpolate | | √ | | | | -| ThreeNN | | √ | √ | | | -| TINShift | | √ | √ | | | -| UpFirDn2d | | √ | | | | -| Voxelization | √ | √ | | | √ | -| PrRoIPool | | √ | | | | -| BezierAlign | √ | √ | | | | -| BiasAct | | √ | | | | -| FilteredLrelu | | √ | | | | -| Conv2dGradfix | | √ | | | | diff --git a/docs/zh_cn/Makefile b/docs_zh_CN/Makefile similarity index 100% rename from docs/zh_cn/Makefile rename to docs_zh_CN/Makefile diff --git a/docs/zh_cn/_static/css/readthedocs.css b/docs_zh_CN/_static/css/readthedocs.css similarity index 75% rename from docs/zh_cn/_static/css/readthedocs.css rename to docs_zh_CN/_static/css/readthedocs.css index 9e3a567..3f425fc 100644 --- a/docs/zh_cn/_static/css/readthedocs.css +++ b/docs_zh_CN/_static/css/readthedocs.css @@ -4,7 +4,3 @@ height: 40px; width: 85px; } - -table.colwidths-auto td { - width: 50% -} diff --git a/docs/zh_cn/_static/image/mmcv-logo.png b/docs_zh_CN/_static/image/mmcv-logo.png similarity index 100% rename from docs/zh_cn/_static/image/mmcv-logo.png rename to docs_zh_CN/_static/image/mmcv-logo.png diff --git a/docs_zh_CN/api.rst b/docs_zh_CN/api.rst new file mode 100644 index 0000000..8ca9118 --- /dev/null +++ b/docs_zh_CN/api.rst @@ -0,0 +1,44 @@ +fileio +------- +.. automodule:: mmcv.fileio + :members: + +image +------ +.. automodule:: mmcv.image + :members: + +video +------ +.. automodule:: mmcv.video + :members: + +arraymisc +--------- +.. automodule:: mmcv.arraymisc + :members: + +visualization +-------------- +.. automodule:: mmcv.visualization + :members: + +utils +----- +.. automodule:: mmcv.utils + :members: + +cnn +---- +.. automodule:: mmcv.cnn + :members: + +runner +------ +.. automodule:: mmcv.runner + :members: + +ops +------ +.. automodule:: mmcv.ops + :members: diff --git a/docs_zh_CN/community/contributing.md b/docs_zh_CN/community/contributing.md new file mode 100644 index 0000000..30bac87 --- /dev/null +++ b/docs_zh_CN/community/contributing.md @@ -0,0 +1,69 @@ +## è´¡çŒ®ä»£ç  + +欢迎任何类型的贡献,包括但ä¸é™äºŽ + +- 修改拼写错误或代ç é”™è¯¯ +- 添加文档或将文档翻译æˆå…¶ä»–语言 +- 添加新功能和新组件 + +### å·¥ä½œæµ +| 详细工作æµè§ [拉å–请求](pr.md) +1. å¤åˆ»å¹¶æ‹‰å–最新的 OpenMMLab 算法库 +2. 创建新的分支(ä¸å»ºè®®ä½¿ç”¨ä¸»åˆ†æ”¯ææ‹‰å–请求) +3. æäº¤ä½ çš„修改 +4. 创建拉å–请求 + +```{note} +å¦‚æžœä½ è®¡åˆ’æ·»åŠ æ–°åŠŸèƒ½å¹¶ä¸”è¯¥åŠŸèƒ½åŒ…å«æ¯”较大的改动,建议先开 issue 讨论 +``` +### 代ç é£Žæ ¼ + +#### Python + +[PEP8](https://www.python.org/dev/peps/pep-0008/) 作为 OpenMMLab 算法库首选的代ç è§„范,我们使用以下工具检查和格å¼åŒ–ä»£ç  + +- [flake8](http://flake8.pycqa.org/en/latest/): Python 官方å‘布的代ç è§„范检查工具,是多个检查工具的å°è£… +- [yapf](https://github.com/google/yapf): Google å‘布的代ç è§„范检查工具 +- [isort](https://github.com/timothycrosley/isort): 自动调整模å—导入顺åºçš„工具 +- [markdownlint](https://github.com/markdownlint/markdownlint): 检查 markdown 文件的工具 +- [docformatter](https://github.com/myint/docformatter): æ ¼å¼åŒ– docstring 的工具 + +yapf å’Œ isort çš„é…ç½®å¯ä»¥åœ¨ [setup.cfg](./setup.cfg) 找到 + +通过é…ç½® [pre-commit hook](https://pre-commit.com/) ,我们å¯ä»¥åœ¨æäº¤ä»£ç æ—¶è‡ªåŠ¨æ£€æŸ¥å’Œæ ¼å¼åŒ– `flake8`ã€`yapf`ã€`isort`ã€`trailing whitespaces`ã€`markdown files`, +ä¿®å¤ `end-of-files`ã€`double-quoted-strings`ã€`python-encoding-pragma`ã€`mixed-line-ending`,调整 `requirments.txt` 的包顺åºã€‚ +pre-commit é’©å­çš„é…ç½®å¯ä»¥åœ¨ [.pre-commit-config](./.pre-commit-config.yaml) 找到。 + +在克隆算法库åŽï¼Œä½ éœ€è¦å®‰è£…å¹¶åˆå§‹åŒ– pre-commit é’©å­ + +```shell +pip install -U pre-commit +``` + +切æ¢ç®—法库根目录 + +```shell +pre-commit install +``` + +如果安装 markdownlint é‡åˆ°äº†é—®é¢˜ï¼Œå¯ä»¥å°è¯•使用以下的步骤安装 ruby + +```shell +# install rvm +curl -L https://get.rvm.io | bash -s -- --autolibs=read-fail +[[ -s "$HOME/.rvm/scripts/rvm" ]] && source "$HOME/.rvm/scripts/rvm" +rvm autolibs disable + +# install ruby +rvm install 2.7.1 +``` + +或者å‚考 [这个代ç åº“](https://github.com/innerlee/setup) å’Œ [`zzruby.sh`](https://github.com/innerlee/setup/blob/master/zzruby.sh)。 + +至此,æ¯ä¸€æ¬¡ commit ä¿®æ”¹éƒ½ä¼šè§¦å‘ pre-commit æ£€æŸ¥ä»£ç æ ¼å¼ã€‚ + +>æäº¤æ‹‰å–请求å‰ï¼Œè¯·ç¡®ä¿ä½ çš„代ç ç¬¦åˆ yapf çš„æ ¼å¼ + +#### C++ and CUDA + +C++ å’Œ CUDA 的代ç è§„范éµä»Ž [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html) diff --git a/docs_zh_CN/community/pr.md b/docs_zh_CN/community/pr.md new file mode 100644 index 0000000..219e01d --- /dev/null +++ b/docs_zh_CN/community/pr.md @@ -0,0 +1,90 @@ +## 拉å–请求 + +### 什么是拉å–请求? + +`拉å–请求` (Pull Request), [GitHub 官方文档](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests)定义如下。 + +>拉å–请求是一ç§é€šçŸ¥æœºåˆ¶ã€‚你修改了他人的代ç ï¼Œå°†ä½ çš„修改通知原æ¥ä½œè€…,希望他åˆå¹¶ä½ çš„修改。 + +### 基本的工作æµï¼š + +1. èŽ·å–æœ€æ–°çš„代ç åº“ +2. ä»Žä¸»åˆ†æ”¯åˆ›å»ºæœ€æ–°çš„åˆ†æ”¯è¿›è¡Œå¼€å‘ +3. æäº¤ä¿®æ”¹ +4. 推é€ä½ çš„修改并创建一个`拉å–请求` +5. 讨论ã€å®¡æ ¸ä»£ç  +6. 将开å‘分支åˆå¹¶åˆ°ä¸»åˆ†æ”¯ + +### 具体步骤 + +1. èŽ·å–æœ€æ–°çš„代ç åº“ + + 当你第一次æ PR æ—¶ + - å¤åˆ» OpenMMLab 原代ç åº“,点击 GitHub 页é¢å³ä¸Šè§’çš„ **Fork** 按钮å³å¯ + ![avatar](../../docs/_static/community/1.png) + + - 克隆å¤åˆ»çš„代ç åº“到本地 + ```bash + git clone git@github.com:XXX/mmcv.git + ``` + + - 添加原代ç åº“为上游代ç åº“ + ```bash + git remote add upstream git@github.com:open-mmlab/mmcv + ``` + + 从第二个 PR èµ· + - 检出本地代ç åº“的主分支,然åŽä»Žæœ€æ–°çš„原代ç åº“çš„ä¸»åˆ†æ”¯æ‹‰å–æ›´æ–° + ```bash + git checkout master + git pull upstream master + ``` + +2. 从主分支创建一个新的开å‘分支 + ```bash + git checkout -b branchname + ``` + 注æ„:为了ä¿è¯æäº¤åކ岿¸…æ™°å¯è¯»ï¼Œæˆ‘ä»¬å¼ºçƒˆæŽ¨èæ‚¨å…ˆæ£€å‡ºä¸»åˆ†æ”¯ (master),å†åˆ›å»ºæ–°çš„分支。 + +3. æäº¤ä½ çš„修改 + ```bash + # coding + git add [files] + git commit -m 'messages' + ``` + +4. 推é€ä½ çš„修改到å¤åˆ»çš„代ç åº“,并创建一个`拉å–请求` + + 推é€å½“å‰åˆ†æ”¯åˆ°è¿œç«¯å¤åˆ»çš„代ç åº“ + ```bash + git push origin branchname + ``` + + + 创建一个`拉å–请求` + ![avatar](../../docs/_static/community/2.png) + + + 修改`拉å–请求`ä¿¡æ¯æ¨¡æ¿ï¼Œæè¿°ä¿®æ”¹åŽŸå› å’Œä¿®æ”¹å†…å®¹ã€‚è¿˜å¯ä»¥åœ¨ PR æè¿°ä¸­ï¼Œæ‰‹åЍ关è”到相关的`议题` (issue),(更多细节,请å‚考[官方文档](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue))。 + +5. è®¨è®ºå¹¶è¯„å®¡ä½ çš„ä»£ç  + + 创建`拉å–请求`时,å¯ä»¥å…³è”给相关人员进行评审 + ![avatar](../../docs/_static/community/3.png) + + + æ ¹æ®è¯„审人员的æ„è§ä¿®æ”¹ä»£ç ï¼Œå¹¶æŽ¨é€ä¿®æ”¹ + +6. `拉å–请求`åˆå¹¶ä¹‹åŽåˆ é™¤è¯¥åˆ†æ”¯ +```bash +git branch -d branchname # delete local branch +git push origin --delete branchname # delete remote branch +``` + +### PR 规范 + +1. 使用 [pre-commit hook](https://pre-commit.com),尽é‡å‡å°‘代ç é£Žæ ¼ç›¸å…³é—®é¢˜ +2. 一个PR对应一个短期分支 +3. 粒度è¦ç»†ï¼Œä¸€ä¸ªPRåªåšä¸€ä»¶äº‹æƒ…,é¿å…超大的PR + >- Bad:实现Faster R-CNN + >- Acceptable:ç»™ Faster R-CNN 添加一个 box head + >- Good:ç»™ box head å¢žåŠ ä¸€ä¸ªå‚æ•°æ¥æ”¯æŒè‡ªå®šä¹‰çš„ conv 层数 +4. æ¯æ¬¡ Commit æ—¶éœ€è¦æä¾›æ¸…æ™°ä¸”æœ‰æ„义 commit ä¿¡æ¯ +5. æä¾›æ¸…晰且有æ„义的`拉å–请求`æè¿° + >- 标题写明白任务å称,一般格å¼:[Prefix] Short description of the pull request (Suffix) + >- prefix: 新增功能 [Feature], ä¿® bug [Fix], 文档相关 [Docs], å¼€å‘中 [WIP] (暂时ä¸ä¼šè¢«review) + >- æè¿°é‡Œä»‹ç»`拉å–请求`的主è¦ä¿®æ”¹å†…容,结果,以åŠå¯¹å…¶ä»–部分的影å“, å‚考`拉å–请求`æ¨¡æ¿ + >- å…³è”相关的`议题` (issue) 和其他`拉å–请求` diff --git a/docs/zh_cn/compatibility.md b/docs_zh_CN/compatibility.md similarity index 100% rename from docs/zh_cn/compatibility.md rename to docs_zh_CN/compatibility.md diff --git a/docs/en/conf.py b/docs_zh_CN/conf.py similarity index 61% rename from docs/en/conf.py rename to docs_zh_CN/conf.py index 471bd22..e0c65d0 100644 --- a/docs/en/conf.py +++ b/docs_zh_CN/conf.py @@ -15,19 +15,21 @@ import os import sys import pytorch_sphinx_theme +from m2r import MdInclude +from recommonmark.transform import AutoStructify from sphinx.builders.html import StandaloneHTMLBuilder -sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath('..')) -version_file = '../../mmcv/version.py' -with open(version_file) as f: +version_file = '../mmcv/version.py' +with open(version_file, 'r') as f: exec(compile(f.read(), version_file, 'exec')) __version__ = locals()['__version__'] # -- Project information ----------------------------------------------------- project = 'mmcv' -copyright = '2018-2022, OpenMMLab' +copyright = '2018-2021, OpenMMLab' author = 'MMCV Authors' # The short X.Y version @@ -47,28 +49,16 @@ release = __version__ extensions = [ 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.intersphinx', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', + 'sphinx.ext.autosectionlabel', 'sphinx_markdown_tables', 'myst_parser', 'sphinx_copybutton', ] # yapf: disable -myst_heading_anchors = 4 - -myst_enable_extensions = ['colon_fence'] - -# Configuration for intersphinx -intersphinx_mapping = { - 'python': ('https://docs.python.org/3', None), - 'numpy': ('https://numpy.org/doc/stable', None), - 'torch': ('https://pytorch.org/docs/stable/', None), - 'mmengine': ('https://mmengine.readthedocs.io/en/latest', None), -} - autodoc_mock_imports = ['mmcv._ext', 'mmcv.utils.ext_loader', 'torchvision'] +autosectionlabel_prefix_document = True # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -89,7 +79,7 @@ master_doc = 'index' # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'zh_CN' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -118,9 +108,94 @@ html_theme_options = { 'name': 'GitHub', 'url': 'https://github.com/open-mmlab/mmcv' }, - ], - # Specify the language of shared menu - 'menu_lang': 'en', + { + 'name': + '文档', + 'children': [ + { + 'name': 'MMCV', + 'url': 'https://mmcv.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MIM', + 'url': 'https://openmim.readthedocs.io/en/latest/' + }, + { + 'name': 'MMAction2', + 'url': 'https://mmaction2.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMClassification', + 'url': + 'https://mmclassification.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMDetection', + 'url': 'https://mmdetection.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMDetection3D', + 'url': + 'https://mmdetection3d.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMEditing', + 'url': 'https://mmediting.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMGeneration', + 'url': 'https://mmgeneration.readthedocs.io/en/latest/', + }, + { + 'name': 'MMOCR', + 'url': 'https://mmocr.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMPose', + 'url': 'https://mmpose.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMSegmentation', + 'url': + 'https://mmsegmentation.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMTracking', + 'url': 'https://mmtracking.readthedocs.io/zh_CN/latest/', + }, + { + 'name': 'MMFlow', + 'url': 'https://mmflow.readthedocs.io/en/latest/', + }, + { + 'name': 'MMFewShot', + 'url': 'https://mmfewshot.readthedocs.io/zh_CN/latest/', + }, + ] + }, + { + 'name': + 'OpenMMLab', + 'children': [ + { + 'name': '主页', + 'url': 'https://openmmlab.com/' + }, + { + 'name': 'GitHub', + 'url': 'https://github.com/open-mmlab/' + }, + { + 'name': '推特', + 'url': 'https://twitter.com/OpenMMLab' + }, + { + 'name': '知乎', + 'url': 'https://zhihu.com/people/openmmlab' + }, + ] + }, + ] } # Add any paths that contain custom static files (such as style sheets) here, @@ -213,3 +288,16 @@ StandaloneHTMLBuilder.supported_image_types = [ # Ignore >>> when copying code copybutton_prompt_text = r'>>> |\.\.\. ' copybutton_prompt_is_regexp = True + + +def setup(app): + app.add_config_value('no_underscore_emphasis', False, 'env') + app.add_config_value('m2r_parse_relative_links', False, 'env') + app.add_config_value('m2r_anonymous_references', False, 'env') + app.add_config_value('m2r_disable_inline_math', False, 'env') + app.add_directive('mdinclude', MdInclude) + app.add_config_value('recommonmark_config', { + 'auto_toc_tree_section': 'Contents', + 'enable_eval_rst': True, + }, True) + app.add_transform(AutoStructify) diff --git a/docs_zh_CN/deployment/onnx.md b/docs_zh_CN/deployment/onnx.md new file mode 100644 index 0000000..c4e0041 --- /dev/null +++ b/docs_zh_CN/deployment/onnx.md @@ -0,0 +1,19 @@ +## MMCV中ONNX模å—简介 (实验性) + +### register_extra_symbolics + +在将PyTorch模型导出æˆONNXæ—¶ï¼Œéœ€è¦æ³¨å†Œé¢å¤–的符å·å‡½æ•° + +#### 范例 + +```python +import mmcv +from mmcv.onnx import register_extra_symbolics + +opset_version = 11 +register_extra_symbolics(opset_version) +``` + +#### 常è§é—®é¢˜ + +- æ—  diff --git a/docs_zh_CN/deployment/onnxruntime_custom_ops.md b/docs_zh_CN/deployment/onnxruntime_custom_ops.md new file mode 100644 index 0000000..594aefb --- /dev/null +++ b/docs_zh_CN/deployment/onnxruntime_custom_ops.md @@ -0,0 +1,333 @@ +## ONNX Runtimeè‡ªå®šä¹‰ç®—å­ + + + +- [ONNX Runtime自定义算å­](#onnx-runtime自定义算å­) + - [SoftNMS](#softnms) + - [æè¿°](#æè¿°) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°) + - [输入](#输入) + - [输出](#输出) + - [类型约æŸ](#类型约æŸ) + - [RoIAlign](#roialign) + - [æè¿°](#æè¿°-1) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-1) + - [输入](#输入-1) + - [输出](#输出-1) + - [类型约æŸ](#类型约æŸ-1) + - [NMS](#nms) + - [æè¿°](#æè¿°-2) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-2) + - [输入](#输入-2) + - [输出](#输出-2) + - [类型约æŸ](#类型约æŸ-2) + - [grid_sampler](#grid_sampler) + - [æè¿°](#æè¿°-3) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-3) + - [输入](#输入-3) + - [输出](#输出-3) + - [类型约æŸ](#类型约æŸ-3) + - [CornerPool](#cornerpool) + - [æè¿°](#æè¿°-4) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-4) + - [输入](#输入-4) + - [输出](#输出-4) + - [类型约æŸ](#类型约æŸ-4) + - [cummax](#cummax) + - [æè¿°](#æè¿°-5) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-5) + - [输入](#输入-5) + - [输出](#输出-5) + - [类型约æŸ](#类型约æŸ-5) + - [cummin](#cummin) + - [æè¿°](#æè¿°-6) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-6) + - [输入](#输入-6) + - [输出](#输出-6) + - [类型约æŸ](#类型约æŸ-6) + - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) + - [æè¿°](#æè¿°-7) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-7) + - [输入](#输入-7) + - [输出](#输出-7) + - [类型约æŸ](#类型约æŸ-7) + + + +### SoftNMS + +#### æè¿° + +æ ¹æ®`scores`计算`boxes`çš„soft NMS。 请阅读[Soft-NMS -- Improving Object Detection With One Line of Code](https://arxiv.org/abs/1704.04503)了解细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ------- | --------------- | ------------------------------------------------------- | +| `float` | `iou_threshold` | 用æ¥åˆ¤æ–­å€™é€‰æ¡†é‡åˆåº¦çš„阈值,å–值范围[0, 1]。默认值为0 | +| `float` | `sigma` | é«˜æ–¯æ–¹æ³•çš„è¶…å‚æ•° | +| `float` | `min_score` | NMSçš„score阈值 | +| `int` | `method` | NMS的计算方å¼, (0: `naive`, 1: `linear`, 2: `gaussian`) | +| `int` | `offset` | 用æ¥è®¡ç®—候选框的宽高(x2 - x1 + offset)。å¯é€‰å€¼0或1 | + +#### 输入 + +
+
boxes: T
+
输入候选框。形状为(N, 4)的二维张é‡ï¼ŒN为候选框数é‡ã€‚
+
scores: T
+
输入得分。形状为(N, )的一维张é‡ã€‚
+
+ +#### 输出 + +
+
dets: T
+
输出的检测框与得分。形状为(num_valid_boxes, 5)的二维张é‡ï¼Œå†…容为[[x1, y1, x2, y2, score], ...]。num_valid_boxesæ˜¯åˆæ³•的检测框数é‡ã€‚
+
indices: tensor(int64)
+
输出åºå·ã€‚形状为(num_valid_boxes, )的一维张é‡ã€‚
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32) + +### RoIAlign + +#### æè¿° + +在特å¾å›¾ä¸Šè®¡ç®—RoIAlign,通常在åŒé˜¶æ®µç›®æ ‡æ£€æµ‹æ¨¡åž‹çš„bbox_head中使用 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ------- | ---------------- | ------------------------------------------------------- | +| `int` | `output_height` | roi特å¾çš„输出高度 | +| `int` | `output_width` | roi特å¾çš„输出宽度 | +| `float` | `spatial_scale` | 输入检测框的缩放系数 | +| `int` | `sampling_ratio` | 输出的采样率。`0`表示使用密集采样 | +| `str` | `mode` | 池化方å¼ã€‚ `avg`或`max` | +| `int` | `aligned` | 如果`aligned=1`,则åƒç´ ä¼šè¿›è¡Œ-0.5çš„åç§»ä»¥è¾¾åˆ°æ›´å¥½çš„å¯¹é½ | + +#### 输入 + +
+
input: T
+
输入特å¾å›¾ï¼›å½¢çŠ¶ä¸º(N, C, H, W)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒHå’ŒW为输入特å¾å›¾çš„高和宽。
+
rois: T
+
需è¦è¿›è¡Œæ± åŒ–的感兴趣区域;形状为(num_rois, 5)的二维张é‡ï¼Œå†…容为[[batch_index, x1, y1, x2, y2], ...]。roisçš„åæ ‡ä¸ºè¾“入特å¾å›¾çš„åæ ‡ç³»ã€‚
+
+ +#### 输出 + +
+
feat: T
+
池化的输出;形状为(num_rois, C, output_height, output_width)的四维张é‡ã€‚æ¯ä¸ªè¾“出特å¾feat[i]都与输入感兴趣区域rois[i]一一对应。
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32) + +### NMS + +#### æè¿° + +æ ¹æ®IoUé˜ˆå€¼å¯¹å€™é€‰æ¡†è¿›è¡Œéžæžå¤§å€¼æŠ‘制。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ------- | --------------- | ----------------------------------------------------- | +| `float` | `iou_threshold` | 用æ¥åˆ¤æ–­å€™é€‰æ¡†é‡åˆåº¦çš„阈值,å–值范围[0, 1]。默认值为0 | +| `int` | `offset` | 用æ¥è®¡ç®—候选框的宽高(x2 - x1 + offset)。å¯é€‰å€¼0或1 | + +#### 输入 + +
+
boxes: T
+
输入候选框。形状为(N, 4)的二维张é‡ï¼ŒN为候选框数é‡ã€‚
+
scores: T
+
输入得分。形状为(N, )的一维张é‡ã€‚
+
+ +#### 输出 + +
+
indices: tensor(int32, Linear)
+
被选中的候选框索引。形状为(num_valid_boxes, )的一维张é‡ï¼Œnum_valid_boxes表示被选上的候选框数é‡ã€‚
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32) + +### grid_sampler + +#### æè¿° + +æ ¹æ®`grid`çš„åƒç´ ä½ç½®å¯¹`input`进行网格采样。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ----- | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | +| `int` | `interpolation_mode` | 计算输出使用的æ’值模å¼ã€‚(0: `bilinear` , 1: `nearest`) | +| `int` | `padding_mode` | 边缘填充模å¼ã€‚(0: `zeros`, 1: `border`, 2: `reflection`) | +| `int` | `align_corners` | 如果`align_corners=1`,则æžå€¼(`-1`å’Œ`1`)会被当åšè¾“入边缘åƒç´ çš„中心点。如果`align_corners=0`ï¼Œåˆ™å®ƒä»¬ä¼šè¢«çœ‹åšæ˜¯è¾¹ç¼˜åƒç´ çš„边缘点,å‡å°åˆ†è¾¨çŽ‡å¯¹é‡‡æ ·çš„å½±å“ | + +#### 输入 + +
+
input: T
+
输入特å¾ï¼›å½¢çŠ¶ä¸º(N, C, inH, inW)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒinHå’ŒinW为输入特å¾å›¾çš„高和宽。
+
grid: T
+
输入网格;形状为(N, outH, outW, 2)的四维张é‡ï¼ŒoutHå’ŒoutW为输出的高和宽。
+
+ +#### 输出 + +
+
output: T
+
输出特å¾ï¼›å½¢çŠ¶ä¸º(N, C, outH, outW)的四维张é‡ã€‚
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) + +### CornerPool + +#### æè¿° + +对`input`计算CornerPool。请阅读[CornerNet -- Detecting Objects as Paired Keypoints](https://arxiv.org/abs/1808.01244)了解更多细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ----- | ------ | -------------------------------------------------------- | +| `int` | `mode` | 池化模å¼ã€‚(0: `top`, 1: `bottom`, 2: `left`, 3: `right`) | + +#### 输入 + +
+
input: T
+
输入特å¾ï¼›å½¢çŠ¶ä¸º(N, C, H, W)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒHå’ŒW为输入特å¾å›¾çš„高和宽。
+
+ +#### 输出 + +
+
output: T
+
输出特å¾ï¼›å½¢çŠ¶ä¸º(N, C, H, W)的四维张é‡ã€‚
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32) + +### cummax + +#### æè¿° + +返回一个元组(`values`, `indices`),其中`values`为`input`第`dim`维的累计最大值,`indices`为第`dim`维最大值ä½ç½®ã€‚请阅读[torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html)了解更多细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ----- | ------ | ------------------ | +| `int` | `dim` | 进行累计计算的维度 | + +#### 输入 + +
+
input: T
+
输入张é‡ï¼›å¯ä»¥ä½¿ä»»æ„形状;也支æŒç©ºTensor
+
+ +#### 输出 + +
+
output: T
+
`input`第`dim`维的累计最大值,形状与`input`相åŒã€‚类型和`input`一致
+
indices: tensor(int64)
+
第`dim`维最大值ä½ç½®ï¼Œå½¢çŠ¶ä¸Ž`input`相åŒã€‚
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32) + +### cummin + +#### æè¿° + +返回一个元组(`values`, `indices`),其中`values`为`input`第`dim`维的累计最å°å€¼ï¼Œ`indices`为第`dim`维最å°å€¼ä½ç½®ã€‚请阅读[torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html)了解更多细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ----- | ------ | ------------------ | +| `int` | `dim` | 进行累计计算的维度 | + +#### 输入 + +
+
input: T
+
输入张é‡ï¼›å¯ä»¥æ˜¯ä»»æ„形状;也支æŒç©ºTensor
+
+ +#### 输出 + +
+
output: T
+
`input`第`dim`维的累计最å°å€¼ï¼Œå½¢çŠ¶ä¸Ž`input`相åŒã€‚类型和`input`一致
+
indices: tensor(int64)
+
第`dim`维最å°å€¼ä½ç½®ï¼Œå½¢çŠ¶ä¸Ž`input`相åŒã€‚
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32) + +### MMCVModulatedDeformConv2d + +#### æè¿° + +在输入特å¾ä¸Šè®¡ç®—Modulated Deformable Convolution,请阅读[Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline)了解更多细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| -------------- | ------------------- | ------------------------------------------------------------- | +| `list of ints` | `stride` | å·ç§¯çš„æ­¥é•¿ (sH, sW) | +| `list of ints` | `padding` | 输入特å¾å¡«å……å¤§å° (padH, padW) | +| `list of ints` | `dilation` | å·ç§¯æ ¸å„元素间隔 (dH, dW) | +| `int` | `deformable_groups` | å¯å˜åç§»é‡çš„分组,通常置ä½1å³å¯ | +| `int` | `groups` | å·ç§¯åˆ†ç»„数,`input_channel`会根æ®è¿™ä¸ªå€¼è¢«åˆ†ä¸ºæ•°ä¸ªåˆ†ç»„进行计算 | + +#### 输入 + +
+
inputs[0]: T
+
输入特å¾ï¼›å½¢çŠ¶ä¸º(N, C, inH, inW)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒinHå’ŒinW为输入特å¾å›¾çš„高和宽。
+
inputs[1]: T
+
输入åç§»é‡ï¼›å½¢çŠ¶ä¸º(N, deformable_group* 2* kH* kW, outH, outW)的四维张é‡ï¼ŒkHå’ŒkW为输入特å¾å›¾çš„高和宽,outHå’ŒoutW为输入特å¾å›¾çš„高和宽。
+
inputs[2]: T
+
输入掩ç ï¼›å½¢çŠ¶ä¸º(N, deformable_group* kH* kW, outH, outW)的四维张é‡ã€‚
+
inputs[3]: T
+
输入æƒé‡ï¼›å½¢çŠ¶ä¸º(output_channel, input_channel, kH, kW)的四维张é‡ã€‚
+
inputs[4]: T, optional
+
输入åç§»é‡ï¼›å½¢çŠ¶ä¸º(output_channel)的一维张é‡ã€‚
+
+ +#### 输出 + +
+
outputs[0]: T
+
输出特å¾ï¼›å½¢çŠ¶ä¸º(N, output_channel, outH, outW)的四维张é‡ã€‚
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) diff --git a/docs_zh_CN/deployment/onnxruntime_op.md b/docs_zh_CN/deployment/onnxruntime_op.md new file mode 100644 index 0000000..3898aa1 --- /dev/null +++ b/docs_zh_CN/deployment/onnxruntime_op.md @@ -0,0 +1,127 @@ +## MMCV中的ONNX Runtimeè‡ªå®šä¹‰ç®—å­ + +### ONNX Runtimeä»‹ç» + +**ONNX Runtime**是一个跨平å°çš„æŽ¨ç†ä¸Žè®­ç»ƒåŠ é€Ÿå™¨ï¼Œé€‚é…许多常用的机器学习/深度神ç»ç½‘络框架。请访问[github](https://github.com/microsoft/onnxruntime)了解更多信æ¯ã€‚ + +### ONNXä»‹ç» + +**ONNX**是**Open Neural Network Exchange**的缩写,是许多机器学习/深度神ç»ç½‘络框架使用的*中间表示(IR)*。请访问[github](https://github.com/onnx/onnx)了解更多信æ¯ã€‚ + +### 为什么è¦åœ¨MMCV中添加ONNX自定义算å­ï¼Ÿ + +- 为了验è¯ONNX模型在ONNX Runtime下的推ç†çš„æ­£ç¡®æ€§ã€‚ +- 为了方便使用了`mmcv.ops`自定义算å­çš„æ¨¡åž‹çš„部署工作。 + +### MMCV已支æŒçš„ç®—å­ + +| ç®—å­ | CPU | GPU | MMCV版本 | +| :------------------------------------------------------------------------------: | :---: | :---: | :------: | +| [SoftNMS](onnxruntime_custom_ops.md#softnms) | Y | N | 1.2.3 | +| [RoIAlign](onnxruntime_custom_ops.md#roialign) | Y | N | 1.2.5 | +| [NMS](onnxruntime_custom_ops.md#nms) | Y | N | 1.2.7 | +| [grid_sampler](onnxruntime_custom_ops.md#grid_sampler) | Y | N | 1.3.1 | +| [CornerPool](onnxruntime_custom_ops.md#cornerpool) | Y | N | 1.3.4 | +| [cummax](onnxruntime_custom_ops.md#cummax) | Y | N | 1.3.4 | +| [cummin](onnxruntime_custom_ops.md#cummin) | Y | N | 1.3.4 | +| [MMCVModulatedDeformConv2d](onnxruntime_custom_ops.md#mmcvmodulateddeformconv2d) | Y | N | 1.3.12 | + +### 如何编译ONNX Runtime自定义算å­ï¼Ÿ + +*è¯·æ³¨æ„æˆ‘们仅在**onnxruntime>=1.8.1**çš„Linux x86-64 cpuå¹³å°ä¸Šè¿›è¡Œè¿‡æµ‹è¯•* + +#### 准备工作 + +- 克隆代ç ä»“库 + +```bash +git clone https://github.com/open-mmlab/mmcv.git +``` + +- 从ONNX Runtime下载`onnxruntime-linux`:[releases](https://github.com/microsoft/onnxruntime/releases/tag/v1.8.1),解压缩,根æ®è·¯å¾„创建å˜é‡`ONNXRUNTIME_DIR`并把路径下的lib目录添加到`LD_LIBRARY_PATH`,步骤如下: + +```bash +wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz + +tar -zxvf onnxruntime-linux-x64-1.8.1.tgz +cd onnxruntime-linux-x64-1.8.1 +export ONNXRUNTIME_DIR=$(pwd) +export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH +``` + +#### Linux系统下编译 + +```bash +cd mmcv ## to MMCV root directory +MMCV_WITH_OPS=1 MMCV_WITH_ORT=1 python setup.py develop +``` + +### 如何在python下使用ONNX Runtime对导出的ONNX模型åšç¼–译 + +使用`pip`安装ONNX Runtime + +```bash +pip install onnxruntime==1.8.1 +``` + +推ç†èŒƒä¾‹ + +```python +import os + +import numpy as np +import onnxruntime as ort + +from mmcv.ops import get_onnxruntime_op_path + +ort_custom_op_path = get_onnxruntime_op_path() +assert os.path.exists(ort_custom_op_path) +session_options = ort.SessionOptions() +session_options.register_custom_ops_library(ort_custom_op_path) +## exported ONNX model with custom operators +onnx_file = 'sample.onnx' +input_data = np.random.randn(1, 3, 224, 224).astype(np.float32) +sess = ort.InferenceSession(onnx_file, session_options) +onnx_results = sess.run(None, {'input' : input_data}) +``` + +### 如何为MMCV添加ONNX Runtimeçš„è‡ªå®šä¹‰ç®—å­ + +#### å¼€å‘剿醒 + +- 该算å­çš„ONNX Runtime实现尚未在MMCV中支æŒ[已实现算å­åˆ—表](https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md)。 +- ç¡®ä¿è¯¥è‡ªå®šä¹‰ç®—å­å¯ä»¥è¢«ONNX导出。 + +#### 添加方法 + +以`soft_nms`为例: + +1. 在ONNX Runtime头文件目录`mmcv/ops/csrc/onnxruntime/`下添加头文件`soft_nms.h` +2. 在ONNX Runtimeæºç ç›®å½•`mmcv/ops/csrc/onnxruntime/cpu/`下添加算å­å®žçް`soft_nms.cpp` +3. 在[onnxruntime_register.cpp](../../mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp)中注册实现的算å­`soft_nms` + + ```c++ + #include "soft_nms.h" + + SoftNmsOp c_SoftNmsOp; + + if (auto status = ortApi->CustomOpDomain_Add(domain, &c_SoftNmsOp)) { + return status; + } + ``` + +4. 在`tests/test_ops/test_onnx.py`添加å•元测试, + å¯ä»¥å‚考[here](../../tests/test_ops/test_onnx.py)。 + +**最åŽï¼Œæ¬¢è¿Žä¸ºMMCV添加ONNX Runtime自定义算å­** :nerd_face: + +### 已知问题 + +- "RuntimeError: tuple appears in op that does not forward tuples, unsupported kind: `prim::PythonOp`." + 1. 请注æ„`cummax`å’Œ`cummin`ç®—å­æ˜¯åœ¨torch >= 1.5.0被添加的。但他们需è¦åœ¨torch version >= 1.7.0æ‰èƒ½æ­£ç¡®å¯¼å‡ºã€‚å¦åˆ™ä¼šåœ¨å¯¼å‡ºæ—¶å‘生上é¢çš„错误。 + 2. 解决方法:å‡çº§PyTorch到1.7.0以上版本 + +### 引用 + +- [How to export Pytorch model with custom op to ONNX and run it in ONNX Runtime](https://github.com/onnx/tutorials/blob/master/PyTorchCustomOperator/README.md) +- [How to add a custom operator/kernel in ONNX Runtime](https://github.com/microsoft/onnxruntime/blob/master/docs/AddingCustomOp.md) diff --git a/docs_zh_CN/deployment/tensorrt_custom_ops.md b/docs_zh_CN/deployment/tensorrt_custom_ops.md new file mode 100644 index 0000000..123f288 --- /dev/null +++ b/docs_zh_CN/deployment/tensorrt_custom_ops.md @@ -0,0 +1,391 @@ +## TensorRTè‡ªå®šä¹‰ç®—å­ + + + +- [TensorRT自定义算å­](#tensorrt自定义算å­) + - [MMCVRoIAlign](#mmcvroialign) + - [æè¿°](#æè¿°) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°) + - [输入](#输入) + - [输出](#输出) + - [类型约æŸ](#类型约æŸ) + - [ScatterND](#scatternd) + - [æè¿°](#æè¿°-1) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-1) + - [输入](#输入-1) + - [输出](#输出-1) + - [类型约æŸ](#类型约æŸ-1) + - [NonMaxSuppression](#nonmaxsuppression) + - [æè¿°](#æè¿°-2) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-2) + - [输入](#输入-2) + - [输出](#输出-2) + - [类型约æŸ](#类型约æŸ-2) + - [MMCVDeformConv2d](#mmcvdeformconv2d) + - [æè¿°](#æè¿°-3) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-3) + - [输入](#输入-3) + - [输出](#输出-3) + - [类型约æŸ](#类型约æŸ-3) + - [grid_sampler](#grid_sampler) + - [æè¿°](#æè¿°-4) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-4) + - [输入](#输入-4) + - [输出](#输出-4) + - [类型约æŸ](#类型约æŸ-4) + - [cummax](#cummax) + - [æè¿°](#æè¿°-5) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-5) + - [输入](#输入-5) + - [输出](#输出-5) + - [类型约æŸ](#类型约æŸ-5) + - [cummin](#cummin) + - [æè¿°](#æè¿°-6) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-6) + - [输入](#输入-6) + - [输出](#输出-6) + - [类型约æŸ](#类型约æŸ-6) + - [MMCVInstanceNormalization](#mmcvinstancenormalization) + - [æè¿°](#æè¿°-7) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-7) + - [输入](#输入-7) + - [输出](#输出-7) + - [类型约æŸ](#类型约æŸ-7) + - [MMCVModulatedDeformConv2d](#mmcvmodulateddeformconv2d) + - [æè¿°](#æè¿°-8) + - [æ¨¡åž‹å‚æ•°](#æ¨¡åž‹å‚æ•°-8) + - [输入](#输入-8) + - [输出](#输出-8) + - [类型约æŸ](#类型约æŸ-8) + + + +### MMCVRoIAlign + +#### æè¿° + +在特å¾å›¾ä¸Šè®¡ç®—RoIAlign,在多数åŒé˜¶æ®µç›®æ ‡æ£€æµ‹æ¨¡åž‹çš„bbox_head中使用 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ------- | ---------------- | ------------------------------------------------------- | +| `int` | `output_height` | roi特å¾çš„输出高度 | +| `int` | `output_width` | roi特å¾çš„输出宽度 | +| `float` | `spatial_scale` | 输入检测框的缩放系数 | +| `int` | `sampling_ratio` | 输出的采样率。`0`表示使用密集采样 | +| `str` | `mode` | 池化方å¼ã€‚ `avg`或`max` | +| `int` | `aligned` | 如果`aligned=1`,则åƒç´ ä¼šè¿›è¡Œ-0.5çš„åç§»ä»¥è¾¾åˆ°æ›´å¥½çš„å¯¹é½ | + +#### 输入 + +
+
inputs[0]: T
+
输入特å¾å›¾ï¼›å½¢çŠ¶ä¸º(N, C, H, W)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒHå’ŒW为输入特å¾å›¾çš„高和宽。
+
inputs[1]: T
+
需è¦è¿›è¡Œæ± åŒ–的感兴趣区域;形状为(num_rois, 5)的二维张é‡ï¼Œå†…容为[[batch_index, x1, y1, x2, y2], ...]。roisçš„åæ ‡ä¸ºè¾“入特å¾å›¾çš„åæ ‡ç³»ã€‚
+
+ +#### 输出 + +
+
outputs[0]: T
+
池化的输出;形状为(num_rois, C, output_height, output_width)的四维张é‡ã€‚æ¯ä¸ªè¾“出特å¾feat[i]都与输入感兴趣区域rois[i]一一对应。
+
+#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) + +### ScatterND + +#### æè¿° + +ScatterND接收三个输入,分别为秩为r >= 1çš„`data`,秩为q >= 1çš„`indices`以åŠç§©ä¸º q + r - indices.shape[-1] -1 çš„`update`。输出的计算方å¼ä¸ºï¼šé¦–先创建一个`data`的拷è´ï¼Œç„¶åŽæ ¹æ®`indces`的值使用`update`对拷è´çš„`data`进行更新。注æ„`indices`中ä¸åº”该存在相åŒçš„æ¡ç›®ï¼Œä¹Ÿå°±æ˜¯è¯´å¯¹åŒä¸€ä¸ªä½ç½®è¿›è¡Œä¸€æ¬¡ä»¥ä¸Šçš„æ›´æ–°æ˜¯ä¸å…许的。 + +输出的计算方å¼å¯ä»¥å‚考如下代ç ï¼š + +```python + output = np.copy(data) + update_indices = indices.shape[:-1] + for idx in np.ndindex(update_indices): + output[indices[idx]] = updates[idx] +``` + +#### æ¨¡åž‹å‚æ•° + +æ—  + +#### 输入 + +
+
inputs[0]: T
+
秩为r >= 1的输入`data`
+ +
inputs[1]: tensor(int32, Linear)
+
秩为q >= 1的输入`update`
+ +
inputs[2]: T
+
秩为 q + r - indices.shape[-1] -1 的输入`update`
+
+ +#### 输出 + +
+
outputs[0]: T
+
秩为r >= 1的输出张é‡
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear), tensor(int32, Linear) + +### NonMaxSuppression + +#### æè¿° + +æ ¹æ®IoUé˜ˆå€¼å¯¹å€™é€‰æ¡†è¿›è¡Œéžæžå¤§å€¼æŠ‘制。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ------- | ---------------------------- | ---------------------------------------------------------------------------------------- | +| `int` | `center_point_box` | 0 - 候选框的格å¼ä¸º[y1, x1, y2, x2], 1-候选框的格å¼ä¸º[x_center, y_center, width, height] | +| `int` | `max_output_boxes_per_class` | æ¯ä¸€ç±»æœ€å¤§çš„输出检测框个数。默认为0,输出检测框个数等于输入候选框数 | +| `float` | `iou_threshold` | 用æ¥åˆ¤æ–­å€™é€‰æ¡†é‡åˆåº¦çš„阈值,å–值范围[0, 1]。默认值为0 | +| `float` | `score_threshold` | 用æ¥åˆ¤æ–­å€™é€‰æ¡†æ˜¯å¦åˆæ³•的阈值 | +| `int` | `offset` | 检测框长宽计算方å¼ä¸º(x2 - x1 + offset),å¯é€‰å€¼0或1 | + +#### 输入 + +
+
inputs[0]: T
+
输入候选框。形状为(num_batches, spatial_dimension, 4)的三维张é‡
+
inputs[1]: T
+
输入得分。形状为(num_batches, num_classes, spatial_dimension)的三维张é‡
+
+ +#### 输出 + +
+
outputs[0]: tensor(int32, Linear)
+
被选中的候选框索引。形状为(num_selected_indices, 3)的二维张é‡ã€‚æ¯ä¸€è¡Œå†…容为[batch_index, class_index, box_index]。
+
其中 num_selected_indices=num_batches* num_classes* min(max_output_boxes_per_class, spatial_dimension)。
+
所有未被选中的候选框索引都会被填充为-1
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) + +### MMCVDeformConv2d + +#### æè¿° + +在输入特å¾ä¸Šè®¡ç®—Deformable Convolution,请阅读[Deformable Convolutional Network](https://arxiv.org/abs/1703.06211)了解更多细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| -------------- | ------------------ | --------------------------------------------------------------------------------------------- | +| `list of ints` | `stride` | å·ç§¯çš„æ­¥é•¿ (sH, sW) | +| `list of ints` | `padding` | 输入特å¾å¡«å……å¤§å° (padH, padW) | +| `list of ints` | `dilation` | å·ç§¯æ ¸å„元素间隔 (dH, dW) | +| `int` | `deformable_group` | å¯å˜åç§»é‡çš„分组 | +| `int` | `group` | å·ç§¯åˆ†ç»„数,`input_channel`会根æ®è¿™ä¸ªå€¼è¢«åˆ†ä¸ºæ•°ä¸ªåˆ†ç»„进行计算 | +| `int` | `im2col_step` | å¯å˜å·ç§¯ä½¿ç”¨im2col计算å·ç§¯ã€‚输入与åç§»é‡ä¼šä»¥im2col_step为步长分å—计算,å‡å°‘临时空间的使用é‡ã€‚ | + +#### 输入 + +
+
inputs[0]: T
+
输入特å¾ï¼›å½¢çŠ¶ä¸º(N, C, inH, inW)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒinHå’ŒinW为输入特å¾å›¾çš„高和宽
+
inputs[1]: T
+
输入åç§»é‡ï¼›å½¢çŠ¶ä¸º(N, deformable_group* 2* kH* kW, outH, outW)的四维张é‡ï¼ŒkHå’ŒkW为输入特å¾å›¾çš„高和宽,outHå’ŒoutW为输入特å¾å›¾çš„高和宽
+
inputs[2]: T
+
输入æƒé‡ï¼›å½¢çŠ¶ä¸º(output_channel, input_channel, kH, kW)的四维张é‡
+
+ +#### 输出 + +
+
outputs[0]: T
+
输出特å¾ï¼›å½¢çŠ¶ä¸º(N, output_channel, outH, outW)的四维张é‡
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) + +### grid_sampler + +#### æè¿° + +æ ¹æ®`grid`çš„åƒç´ ä½ç½®å¯¹`input`进行网格采样。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ----- | -------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------- | +| `int` | `interpolation_mode` | 计算输出使用的æ’值模å¼ã€‚(0: `bilinear` , 1: `nearest`) | +| `int` | `padding_mode` | 边缘填充模å¼ã€‚(0: `zeros`, 1: `border`, 2: `reflection`) | +| `int` | `align_corners` | 如果`align_corners=1`,则æžå€¼(`-1`å’Œ`1`)会被当åšè¾“入边缘åƒç´ çš„中心点。如果`align_corners=0`ï¼Œåˆ™å®ƒä»¬ä¼šè¢«çœ‹åšæ˜¯è¾¹ç¼˜åƒç´ çš„边缘点,å‡å°åˆ†è¾¨çŽ‡å¯¹é‡‡æ ·çš„å½±å“ | + +#### 输入 + +
+
inputs[0]: T
+
输入特å¾ï¼›å½¢çŠ¶ä¸º(N, C, inH, inW)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒinHå’ŒinW为输入特å¾å›¾çš„高和宽
+
inputs[1]: T
+
输入网格;形状为(N, outH, outW, 2)的四维张é‡ï¼ŒoutHå’ŒoutW为输出的高和宽
+
+ +#### 输出 + +
+
outputs[0]: T
+
输出特å¾ï¼›å½¢çŠ¶ä¸º(N, C, outH, outW)的四维张é‡
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) + +### cummax + +#### æè¿° + +返回一个元组(`values`, `indices`),其中`values`为`input`第`dim`维的累计最大值,`indices`为第`dim`维最大值ä½ç½®ã€‚请阅读[torch.cummax](https://pytorch.org/docs/stable/generated/torch.cummax.html)了解更多细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ----- | ------ | ------------------ | +| `int` | `dim` | 进行累计计算的维度 | + +#### 输入 + +
+
inputs[0]: T
+
输入张é‡ï¼›å¯ä»¥ä½¿ä»»æ„形状
+
+ +#### 输出 + +
+
outputs[0]: T
+
`input`第`dim`维的累计最大值,形状与`input`相åŒã€‚类型和`input`一致
+
outputs[1]: (int32, Linear)
+
第`dim`维最大值ä½ç½®ï¼Œå½¢çŠ¶ä¸Ž`input`相åŒ
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) + +### cummin + +#### æè¿° + +返回一个元组(`values`, `indices`),其中`values`为`input`第`dim`维的累计最å°å€¼ï¼Œ`indices`为第`dim`维最å°å€¼ä½ç½®ã€‚请阅读[torch.cummin](https://pytorch.org/docs/stable/generated/torch.cummin.html)了解更多细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ----- | ------ | ------------------ | +| `int` | `dim` | 进行累计计算的维度 | + +#### 输入 + +
+
inputs[0]: T
+
输入张é‡ï¼›å¯ä»¥ä½¿ä»»æ„形状
+
+ +#### 输出 + +
+
outputs[0]: T
+
`input`第`dim`维的累计最å°å€¼ï¼Œå½¢çŠ¶ä¸Ž`input`相åŒã€‚类型和`input`一致
+
outputs[1]: (int32, Linear)
+
第`dim`维最å°å€¼ä½ç½®ï¼Œå½¢çŠ¶ä¸Ž`input`相åŒ
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) + +### MMCVInstanceNormalization + +#### æè¿° + +对特å¾è®¡ç®—instance normalization,请阅读[Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022)了解更多详细信æ¯ã€‚ + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| ------- | --------- | ---------------------------- | +| `float` | `epsilon` | 用æ¥é¿å…除0错误。默认为1e-05 | + +#### 输入 + +
+
inputs[0]: T
+
输入特å¾ã€‚形状为(N, C, H, W)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒHå’ŒW为输入特å¾å›¾çš„高和宽
+
inputs[1]: T
+
输入缩放系数。形状为(C,)的一维张é‡
+
inputs[2]: T
+
输入åç§»é‡ã€‚形状为(C,)的一维张é‡
+
+ +#### 输出 + +
+
outputs[0]: T
+
输出特å¾ã€‚形状为(N, C, H, W)的四维张é‡
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) + +### MMCVModulatedDeformConv2d + +#### æè¿° + +在输入特å¾ä¸Šè®¡ç®—Modulated Deformable Convolution,请阅读[Deformable ConvNets v2: More Deformable, Better Results](https://arxiv.org/abs/1811.11168?from=timeline)了解更多细节。 + +#### æ¨¡åž‹å‚æ•° + +| 类型 | 傿•°å | æè¿° | +| -------------- | ------------------- | ------------------------------------------------------------- | +| `list of ints` | `stride` | å·ç§¯çš„æ­¥é•¿ (sH, sW) | +| `list of ints` | `padding` | 输入特å¾å¡«å……å¤§å° (padH, padW) | +| `list of ints` | `dilation` | å·ç§¯æ ¸å„元素间隔 (dH, dW) | +| `int` | `deformable_groups` | å¯å˜åç§»é‡çš„分组,通常置ä½1å³å¯ | +| `int` | `groups` | å·ç§¯åˆ†ç»„数,`input_channel`会根æ®è¿™ä¸ªå€¼è¢«åˆ†ä¸ºæ•°ä¸ªåˆ†ç»„进行计算 | + +#### 输入 + +
+
inputs[0]: T
+
输入特å¾ï¼›å½¢çŠ¶ä¸º(N, C, inH, inW)的四维张é‡ï¼Œå…¶ä¸­N为batch大å°ï¼ŒCä¸ºè¾“å…¥é€šé“æ•°ï¼ŒinHå’ŒinW为输入特å¾å›¾çš„高和宽
+
inputs[1]: T
+
输入åç§»é‡ï¼›å½¢çŠ¶ä¸º(N, deformable_group* 2* kH* kW, outH, outW)的四维张é‡ï¼ŒkHå’ŒkW为输入特å¾å›¾çš„高和宽,outHå’ŒoutW为输入特å¾å›¾çš„高和宽
+
inputs[2]: T
+
输入掩ç ï¼›å½¢çŠ¶ä¸º(N, deformable_group* kH* kW, outH, outW)的四维张é‡
+
inputs[3]: T
+
输入æƒé‡ï¼›å½¢çŠ¶ä¸º(output_channel, input_channel, kH, kW)的四维张é‡
+
inputs[4]: T, optional
+
输入åç§»é‡ï¼›å½¢çŠ¶ä¸º(output_channel)的一维张é‡
+
+ +#### 输出 + +
+
outputs[0]: T
+
输出特å¾ï¼›å½¢çŠ¶ä¸º(N, output_channel, outH, outW)的四维张é‡
+
+ +#### ç±»åž‹çº¦æŸ + +- T:tensor(float32, Linear) diff --git a/docs_zh_CN/deployment/tensorrt_plugin.md b/docs_zh_CN/deployment/tensorrt_plugin.md new file mode 100644 index 0000000..0f385b8 --- /dev/null +++ b/docs_zh_CN/deployment/tensorrt_plugin.md @@ -0,0 +1,177 @@ +## MMCV中的TensorRTè‡ªå®šä¹‰ç®—å­ (实验性) + + + +- [MMCV中的TensorRTè‡ªå®šä¹‰ç®—å­ (实验性)](#mmcv中的tensorrt自定义算å­-实验性) + - [介ç»](#介ç») + - [MMCV中的TensorRTæ’件列表](#mmcv中的tensorrtæ’件列表) + - [如何编译MMCV中的TensorRTæ’ä»¶](#如何编译mmcv中的tensorrtæ’ä»¶) + - [准备](#准备) + - [在Linux上编译](#在linux上编译) + - [创建TensorRT推ç†å¼•擎并在python下进行推ç†](#创建tensorrt推ç†å¼•擎并在python下进行推ç†) + - [如何在MMCV中添加新的TensorRT自定义算å­](#如何在mmcv中添加新的tensorrt自定义算å­) + - [ä¸»è¦æµç¨‹](#ä¸»è¦æµç¨‹) + - [注æ„](#注æ„) + - [已知问题](#已知问题) + - [引用](#引用) + + + +### ä»‹ç» + +**NVIDIA TensorRT**是一个为深度学习模型高性能推ç†å‡†å¤‡çš„软件开å‘工具(SDK)。它包括深度学习推ç†ä¼˜åŒ–器和è¿è¡Œæ—¶ï¼Œå¯ä¸ºæ·±åº¦å­¦ä¹ æŽ¨ç†åº”用æä¾›ä½Žå»¶è¿Ÿå’Œé«˜åžåé‡ã€‚请访问[developer's website](https://developer.nvidia.com/tensorrt)了解更多信æ¯ã€‚ +为了简化TensorRT部署带有MMCV自定义算å­çš„æ¨¡åž‹çš„æµç¨‹ï¼ŒMMCV中添加了一系列TensorRTæ’件。 + +### MMCV中的TensorRTæ’件列表 + +| ONNXç®—å­ | TensorRTæ’ä»¶ | MMCV版本 | +| :-----------------------: | :-----------------------------------------------------------------------------: | :------: | +| MMCVRoiAlign | [MMCVRoiAlign](./tensorrt_custom_ops.md#mmcvroialign) | 1.2.6 | +| ScatterND | [ScatterND](./tensorrt_custom_ops.md#scatternd) | 1.2.6 | +| NonMaxSuppression | [NonMaxSuppression](./tensorrt_custom_ops.md#nonmaxsuppression) | 1.3.0 | +| MMCVDeformConv2d | [MMCVDeformConv2d](./tensorrt_custom_ops.md#mmcvdeformconv2d) | 1.3.0 | +| grid_sampler | [grid_sampler](./tensorrt_custom_ops.md#grid-sampler) | 1.3.1 | +| cummax | [cummax](./tensorrt_custom_ops.md#cummax) | 1.3.5 | +| cummin | [cummin](./tensorrt_custom_ops.md#cummin) | 1.3.5 | +| MMCVInstanceNormalization | [MMCVInstanceNormalization](./tensorrt_custom_ops.md#mmcvinstancenormalization) | 1.3.5 | +| MMCVModulatedDeformConv2d | [MMCVModulatedDeformConv2d](./tensorrt_custom_ops.md#mmcvmodulateddeformconv2d) | master | + +æ³¨æ„ + +- 以上所有算å­å‡åœ¨ TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0 环境下开å‘。 + +### 如何编译MMCV中的TensorRTæ’ä»¶ + +#### 准备 + +- 克隆代ç ä»“库 + +```bash +git clone https://github.com/open-mmlab/mmcv.git +``` + +- 安装TensorRT + +从 [NVIDIA Developer Zone](https://developer.nvidia.com/nvidia-tensorrt-download) 下载åˆé€‚çš„TensorRT版本。 + +比如,对安装了cuda-10.2çš„x86-64çš„Ubuntu 16.04,下载文件为`TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz`. + +ç„¶åŽä½¿ç”¨ä¸‹é¢æ–¹å¼å®‰è£…å¹¶é…置环境 + +```bash +cd ~/Downloads +tar -xvzf TensorRT-7.2.1.6.Ubuntu-16.04.x86_64-gnu.cuda-10.2.cudnn8.0.tar.gz +export TENSORRT_DIR=`pwd`/TensorRT-7.2.1.6 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TENSORRT_DIR/lib +``` + +安装pythonä¾èµ–: tensorrt, graphsurgeon, onnx-graphsurgeon + +```bash +pip install $TENSORRT_DIR/python/tensorrt-7.2.1.6-cp37-none-linux_x86_64.whl +pip install $TENSORRT_DIR/onnx_graphsurgeon/onnx_graphsurgeon-0.2.6-py2.py3-none-any.whl +pip install $TENSORRT_DIR/graphsurgeon/graphsurgeon-0.4.5-py2.py3-none-any.whl +``` + +想了解更多通过tar包安装TensorRT,请访问[Nvidia' website](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-721/install-guide/index.html#installing-tar). + +#### 在Linux上编译 + +```bash +cd mmcv ## to MMCV root directory +MMCV_WITH_OPS=1 MMCV_WITH_TRT=1 pip install -e . +``` + +### 创建TensorRT推ç†å¼•擎并在pythonä¸‹è¿›è¡ŒæŽ¨ç† + +范例如下: + +```python +import torch +import onnx + +from mmcv.tensorrt import (TRTWrapper, onnx2trt, save_trt_engine, + is_tensorrt_plugin_loaded) + +assert is_tensorrt_plugin_loaded(), 'Requires to complie TensorRT plugins in mmcv' + +onnx_file = 'sample.onnx' +trt_file = 'sample.trt' +onnx_model = onnx.load(onnx_file) + +## Model input +inputs = torch.rand(1, 3, 224, 224).cuda() +## Model input shape info +opt_shape_dict = { + 'input': [list(inputs.shape), + list(inputs.shape), + list(inputs.shape)] +} + +## Create TensorRT engine +max_workspace_size = 1 << 30 +trt_engine = onnx2trt( + onnx_model, + opt_shape_dict, + max_workspace_size=max_workspace_size) + +## Save TensorRT engine +save_trt_engine(trt_engine, trt_file) + +## Run inference with TensorRT +trt_model = TRTWrapper(trt_file, ['input'], ['output']) + +with torch.no_grad(): + trt_outputs = trt_model({'input': inputs}) + output = trt_outputs['output'] + +``` + +### 如何在MMCV中添加新的TensorRTè‡ªå®šä¹‰ç®—å­ + +#### ä¸»è¦æµç¨‹ + +䏋颿˜¯ä¸»è¦çš„æ­¥éª¤ï¼š + +1. 添加c++头文件 +2. 添加c++æºæ–‡ä»¶ +3. 添加cuda kernel文件 +4. 在`trt_plugin.cpp`中注册æ’ä»¶ +5. 在`tests/test_ops/test_tensorrt.py`中添加å•元测试 + +**以RoIAlignç®—å­æ’ä»¶`roi_align`举例。** + +1. 在TensorRT包å«ç›®å½•`mmcv/ops/csrc/tensorrt/`中添加头文件`trt_roi_align.hpp` +2. 在TensorRTæºç ç›®å½•`mmcv/ops/csrc/tensorrt/plugins/`中添加头文件`trt_roi_align.cpp` +3. 在TensorRTæºç ç›®å½•`mmcv/ops/csrc/tensorrt/plugins/`中添加cuda kernel文件`trt_roi_align_kernel.cu` +4. 在[trt_plugin.cpp](https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp)中注册`roi_align`æ’ä»¶ + + ```c++ + #include "trt_plugin.hpp" + + #include "trt_roi_align.hpp" + + REGISTER_TENSORRT_PLUGIN(RoIAlignPluginDynamicCreator); + + extern "C" { + bool initLibMMCVInferPlugins() { return true; } + } // extern "C" + ``` + +5. 在`tests/test_ops/test_tensorrt.py`中添加å•元测试 + +#### æ³¨æ„ + +- 部分MMCV中的自定义算å­å­˜åœ¨å¯¹åº”çš„cuda实现,在进行TensorRTæ’ä»¶å¼€å‘的时候å¯ä»¥å‚考。 + +### 已知问题 + +- æ—  + +### 引用 + +- [Developer guide of Nvidia TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html) +- [TensorRT Open Source Software](https://github.com/NVIDIA/TensorRT) +- [onnx-tensorrt](https://github.com/onnx/onnx-tensorrt) +- [TensorRT python API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html) +- [TensorRT c++ plugin API](https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_plugin.html) diff --git a/docs_zh_CN/faq.md b/docs_zh_CN/faq.md new file mode 100644 index 0000000..e5d6395 --- /dev/null +++ b/docs_zh_CN/faq.md @@ -0,0 +1,37 @@ +## 常è§é—®é¢˜ + +在这里我们列出了用户ç»å¸¸é‡åˆ°çš„问题以åŠå¯¹åº”的解决方法。如果您é‡åˆ°äº†å…¶ä»–常è§çš„问题,并且知é“å¯ä»¥å¸®åˆ°å¤§å®¶çš„解决办法, +æ¬¢è¿Žéšæ—¶ä¸°å¯Œè¿™ä¸ªåˆ—表。 + +- MMCV å’Œ MMDetection 的兼容性问题;"ConvWS is already registered in conv layer" + + 请按照上述说明为您的 MMDetection 版本安装正确版本的 MMCV。 + +- "No module named 'mmcv.ops'"; "No module named 'mmcv._ext'" + + 1. 使用 `pip uninstall mmcv` å¸è½½æ‚¨çŽ¯å¢ƒä¸­çš„ mmcv + 2. 按照上述说明安装 mmcv-full + +- "invalid device function" 或者 "no kernel image is available for execution" + + 1. 检查 GPU çš„ CUDA 计算能力 + 2. è¿è¡Œ `python mmdet/utils/collect_env.py` æ¥æ£€æŸ¥ PyTorchã€torchvision å’Œ MMCV æ˜¯å¦æ˜¯é’ˆå¯¹æ­£ç¡®çš„ GPU 架构构建的 + 您å¯èƒ½éœ€è¦åŽ»è®¾ç½® `TORCH_CUDA_ARCH_LIST` æ¥é‡æ–°å®‰è£… MMCV + 兼容性问题的å¯èƒ½ä¼šå‡ºçŽ°åœ¨ä½¿ç”¨æ—§ç‰ˆçš„ GPUs,如:colab 上的 Tesla K80 (3.7) + 3. 检查è¿è¡ŒçŽ¯å¢ƒæ˜¯å¦å’Œ mmcv/mmdet 编译时的环境相åŒã€‚例如,您å¯èƒ½ä½¿ç”¨ CUDA 10.0 编译 mmcv,但在 CUDA 9.0 的环境中è¿è¡Œå®ƒ + +- "undefined symbol" 或者 "cannot open xxx.so"。 + + 1. 如果符å·å’Œ CUDA/C++ 相关(例如:libcudart.so 或者 GLIBCXX),请检查 CUDA/GCC è¿è¡Œæ—¶çš„版本是å¦å’Œç¼–译 mmcv 的一致 + 2. 如果符å·å’Œ PyTorch 相关(例如:符å·åŒ…å« caffeã€aten å’Œ TH),请检查 PyTorch è¿è¡Œæ—¶çš„版本是å¦å’Œç¼–译 mmcv 的一致 + 3. è¿è¡Œ `python mmdet/utils/collect_env.py` 以检查 PyTorchã€torchvision å’Œ MMCV 构建和è¿è¡Œçš„环境是å¦ç›¸åŒ + +- "RuntimeError: CUDA error: invalid configuration argument"。 + + 这个错误å¯èƒ½æ˜¯ç”±äºŽæ‚¨çš„ GPU 性能ä¸ä½³é€ æˆçš„。å°è¯•é™ä½Ž[THREADS_PER_BLOCK](https://github.com/open-mmlab/mmcv/blob/cac22f8cf5a904477e3b5461b1cc36856c2793da/mmcv/ops/csrc/common_cuda_helper.hpp#L10) + çš„å€¼å¹¶é‡æ–°ç¼–译 mmcv。 + +- "RuntimeError: nms is not compiled with GPU support"。 + + 这个错误是由于您的 CUDA 环境没有正确安装。 + 您å¯ä»¥å°è¯•釿–°å®‰è£…您的 CUDA 环境,然åŽåˆ é™¤ mmcv/build æ–‡ä»¶å¤¹å¹¶é‡æ–°ç¼–译 mmcv。 diff --git a/docs_zh_CN/get_started/build.md b/docs_zh_CN/get_started/build.md new file mode 100644 index 0000000..77fb86e --- /dev/null +++ b/docs_zh_CN/get_started/build.md @@ -0,0 +1,222 @@ +## 从æºç ç¼–译 MMCV + +### 在 Linux 或者 macOS 上编译 MMCV + +克隆算法库 + +```bash +git clone https://github.com/open-mmlab/mmcv.git +cd mmcv +``` + +ä½ å¯ä»¥å®‰è£… lite 版本 + +```bash +pip install -e . +``` + +也å¯ä»¥å®‰è£… full 版本 + +```bash +MMCV_WITH_OPS=1 pip install -e . +``` + +如果是在 macOS 上编译,则需è¦åœ¨å®‰è£…命令剿·»åŠ ä¸€äº›çŽ¯å¢ƒå˜é‡ + +```bash +CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' +``` + +例如 + +```bash +CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' MMCV_WITH_OPS=1 pip install -e . +``` + +```{note} +如果你打算使用 `opencv-python-headless` è€Œä¸æ˜¯ `opencv-python`,例如在一个很å°çš„容器环境或者没有图形用户界é¢çš„æœåŠ¡å™¨ä¸­ï¼Œä½ å¯ä»¥å…ˆå®‰è£… `opencv-python-headless`,这样在安装 mmcv ä¾èµ–的过程中会跳过 `opencv-python` +``` +### 在 Windows 上编译 MMCV + +在 Windows 上编译 MMCV 比 Linux 夿‚,本节将一步步介ç»å¦‚何在 Windows 上编译 MMCV。 + +#### ä¾èµ–项 + +请首先安装以下的ä¾èµ–项: + +- [Git](https://git-scm.com/download/win):安装期间,请选择 **add git to Path** +- [Visual Studio Community 2019](https://visualstudio.microsoft.com):用于编译 C++ å’Œ CUDA ä»£ç  +- [Miniconda](https://docs.conda.io/en/latest/miniconda.html):包管ç†å·¥å…· +- [CUDA 10.2](https://developer.nvidia.com/cuda-10.2-download-archive):如果åªéœ€è¦ CPU 版本å¯ä»¥ä¸å®‰è£… CUDA,安装CUDAæ—¶ï¼Œå¯æ ¹æ®éœ€è¦è¿›è¡Œè‡ªå®šä¹‰å®‰è£…。如果已ç»å®‰è£…新版本的显å¡é©±åŠ¨ï¼Œå»ºè®®å–æ¶ˆé©±åŠ¨ç¨‹åºçš„安装 + +```{note} +您需è¦çŸ¥é“如何在 Windows 上设置å˜é‡çŽ¯å¢ƒï¼Œå°¤å…¶æ˜¯ "PATH" 的设置,以下安装过程都会用到。 +``` + +#### 设置 Python 环境 + +1. 从 Windows èœå•å¯åЍ Anaconda 命令行 + +```{note} +如 Miniconda 安装程åºå»ºè®®ï¼Œä¸è¦ä½¿ç”¨åŽŸå§‹çš„ `cmd.exe` 或是 `powershell.exe`。命令行有两个版本,一个基于 PowerShell,一个基于传统的 `cmd.exe`。请注æ„以下说明都是使用的基于 PowerShell +``` + +2. 创建一个新的 Conda 环境 + + ```shell + conda create --name mmcv python=3.7 # ç»æµ‹è¯•,3.6, 3.7, 3.8 也能通过 + conda activate mmcv # ç¡®ä¿åšä»»ä½•æ“作å‰å…ˆæ¿€æ´»çŽ¯å¢ƒ + ``` + +3. 安装 PyTorch 时,å¯ä»¥æ ¹æ®éœ€è¦å®‰è£…æ”¯æŒ CUDA æˆ–ä¸æ”¯æŒ CUDA 的版本 + + ```shell + # CUDA version + conda install pytorch torchvision cudatoolkit=10.2 -c pytorch + # CPU version + conda install pytorch torchvision cpuonly -c pytorch + ``` + +4. 准备 MMCV æºä»£ç  + + ```shell + git clone https://github.com/open-mmlab/mmcv.git + cd mmcv + ``` + +5. 安装所需 Python ä¾èµ–包 + + ```shell + pip3 install -r requirements.txt + ``` + +#### 编译与安装 MMCV + +MMCV 有三ç§å®‰è£…的模å¼ï¼š + +1. Lite 版本(ä¸åŒ…å«ç®—å­ï¼‰ + + è¿™ç§æ–¹å¼ä¸‹ï¼Œæ²¡æœ‰ç®—å­è¢«ç¼–è¯‘ï¼Œè¿™ç§æ¨¡å¼çš„ mmcv 是原生的 python 包 + +2. Full 版本(åªåŒ…å« CPU ç®—å­ï¼‰ + + 编译 CPU ç®—å­ï¼Œä½†åªæœ‰ x86 将会被编译,并且编译版本åªèƒ½åœ¨ CPU only 情况下è¿è¡Œ + +3. Full ç‰ˆæœ¬ï¼ˆæ—¢åŒ…å« CPU ç®—å­ï¼ŒåˆåŒ…å« CUDA ç®—å­ï¼‰ + + åŒæ—¶ç¼–译 CPU å’Œ CUDA ç®—å­ï¼Œ`ops` 模å—çš„ x86 与 CUDA 的代ç éƒ½å¯ä»¥è¢«ç¼–è¯‘ã€‚åŒæ—¶ç¼–译的版本å¯ä»¥åœ¨ CUDA 上调用 GPU + +##### 通用步骤 + +1. 设置 MSVC 编译器 + + 设置环境å˜é‡ã€‚添加 `C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Tools\MSVC\14.27.29110\bin\Hostx86\x64` 到 `PATH`,则 `cl.exe` å¯ä»¥åœ¨å‘½ä»¤è¡Œä¸­è¿è¡Œï¼Œå¦‚下所示。 + + ```none + (base) PS C:\Users\xxx> cl + Microsoft (R) C/C++ Optimizing Compiler Version 19.27.29111 for x64 + Copyright (C) Microsoft Corporation. All rights reserved. + + usage: cl [ option... ] filename... [ / link linkoption... ] + ``` + + 为了兼容性,我们使用 x86-hosted ä»¥åŠ x64-targeted 版本,å³è·¯å¾„中的 `Hostx86\x64` 。 + + 因为 PyTorch å°†è§£æž `cl.exe` çš„è¾“å‡ºä»¥æ£€æŸ¥å…¶ç‰ˆæœ¬ï¼Œåªæœ‰ utf-8 将会被识别,你å¯èƒ½éœ€è¦å°†ç³»ç»Ÿè¯­è¨€æ›´æ”¹ä¸ºè‹±è¯­ã€‚æŽ§åˆ¶é¢æ¿ -> 地区-> 管ç†-> éž Unicode æ¥è¿›è¡Œè¯­è¨€è½¬æ¢ã€‚ + +##### 安装方å¼ä¸€ï¼šLite version(ä¸åŒ…å«ç®—å­ï¼‰ + +在完æˆä¸Šè¿°çš„公共步骤åŽï¼Œä»Žèœå•打开 Anaconda 命令框,输入以下命令 + +```shell +# 激活环境 +conda activate mmcv +# 切æ¢åˆ° mmcv 根目录 +cd mmcv +# 安装 +python setup.py develop +# 检查是å¦å®‰è£…æˆåŠŸ +pip list +``` + +##### 安装方å¼äºŒï¼šFull version(åªç¼–译 CPU ç®—å­ï¼‰ + +1. 完æˆä¸Šè¿°çš„公共步骤 + +2. 设置环境å˜é‡ + + ```shell + $env:MMCV_WITH_OPS = 1 + $env:MAX_JOBS = 8 # æ ¹æ®ä½ å¯ç”¨CPU以åŠå†…å­˜é‡è¿›è¡Œè®¾ç½® + ``` + +3. 编译安装 + + ```shell + conda activate mmcv # 激活环境 + cd mmcv # 改å˜è·¯å¾„ + python setup.py build_ext # 如果æˆåŠŸ, cl 将被å¯åŠ¨ç”¨äºŽç¼–è¯‘ç®—å­ + python setup.py develop # 安装 + pip list # 检查是å¦å®‰è£…æˆåŠŸ + ``` + +##### 安装方å¼ä¸‰ï¼šFull version(既编译 CPU ç®—å­åˆç¼–译 CUDA ç®—å­ï¼‰ + +1. 完æˆä¸Šè¿°çš„公共步骤 + +2. 设置环境å˜é‡ + + ```shell + $env:MMCV_WITH_OPS = 1 + $env:MAX_JOBS = 8 # æ ¹æ®ä½ å¯ç”¨CPU以åŠå†…å­˜é‡è¿›è¡Œè®¾ç½® + ``` + +3. 检查 `CUDA_PATH` 或者 `CUDA_HOME` 环境å˜é‡å·²ç»å­˜åœ¨åœ¨ `envs` 之中 + + ```none + (base) PS C:\Users\WRH> ls env: + + Name Value + ---- ----- + CUDA_PATH C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 + CUDA_PATH_V10_1 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1 + CUDA_PATH_V10_2 C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2 + ``` + + 如果没有,你å¯ä»¥æŒ‰ç…§ä¸‹é¢çš„æ­¥éª¤è®¾ç½® + + ```shell + $env:CUDA_HOME = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2" + # 或者 + $env:CUDA_HOME = $env:CUDA_PATH_V10_2 # CUDA_PATH_V10_2 å·²ç»åœ¨çŽ¯å¢ƒå˜é‡ä¸­ + ``` + +4. 设置 CUDA 的目标架构 + + ```shell + $env:TORCH_CUDA_ARCH_LIST="6.1" # æ”¯æŒ GTX 1080 + # 或者用所有支æŒçš„版本,但å¯èƒ½ä¼šå˜å¾—很慢 + $env:TORCH_CUDA_ARCH_LIST="3.5 3.7 5.0 5.2 6.0 6.1 7.0 7.5" + ``` + +```{note} +我们å¯ä»¥åœ¨ [here](https://developer.nvidia.com/cuda-gpus) 查看 GPU 的计算能力 +``` + +5. 编译安装 + + ```shell + $env:MMCV_WITH_OPS = 1 + $env:MAX_JOBS = 8 # æ ¹æ®ä½ å¯ç”¨CPU以åŠå†…å­˜é‡è¿›è¡Œè®¾ç½® + conda activate mmcv # 激活环境 + cd mmcv # 改å˜è·¯å¾„ + python setup.py build_ext # 如果æˆåŠŸ, cl 将被å¯åŠ¨ç”¨äºŽç¼–è¯‘ç®—å­ + python setup.py develop # 安装 + pip list # 检查是å¦å®‰è£…æˆåŠŸ + ``` + +```{note} +如果你的 PyTorch 版本是 1.6.0,你å¯èƒ½ä¼šé‡åˆ°ä¸€äº›è¿™ä¸ª [issue](https://github.com/pytorch/pytorch/issues/42467) æåˆ°çš„错误,则å¯ä»¥å‚考这个 [pull request](https://github.com/pytorch/pytorch/pull/43380/files) 修改 本地环境的 PyTorch æºä»£ç  +``` + +如果编译安装 mmcv 的过程中é‡åˆ°äº†é—®é¢˜ï¼Œä½ ä¹Ÿè®¸å¯ä»¥åœ¨ [Frequently Asked Question](../faq.html) 找到解决方法 diff --git a/docs_zh_CN/get_started/installation.md b/docs_zh_CN/get_started/installation.md new file mode 100644 index 0000000..20e8cd5 --- /dev/null +++ b/docs_zh_CN/get_started/installation.md @@ -0,0 +1,158 @@ +## 安装 MMCV + +MMCV 有两个版本: + +- **mmcv-full**: å®Œæ•´ç‰ˆï¼ŒåŒ…å«æ‰€æœ‰çš„特性以åŠä¸°å¯Œçš„开箱å³ç”¨çš„ CUDA ç®—å­ã€‚注æ„完整版本å¯èƒ½éœ€è¦æ›´é•¿æ—¶é—´æ¥ç¼–译。 +- **mmcv**: 精简版,ä¸åŒ…å« CUDA ç®—å­ä½†åŒ…å«å…¶ä½™æ‰€æœ‰ç‰¹æ€§å’ŒåŠŸèƒ½ï¼Œç±»ä¼¼ MMCV 1.0 之å‰çš„版本。如果你ä¸éœ€è¦ä½¿ç”¨ CUDA ç®—å­çš„è¯ï¼Œç²¾ç®€ç‰ˆå¯ä»¥ä½œä¸ºä¸€ä¸ªè€ƒè™‘选项。 + +```{warning} +请ä¸è¦åœ¨åŒä¸€ä¸ªçŽ¯å¢ƒä¸­å®‰è£…ä¸¤ä¸ªç‰ˆæœ¬ï¼Œå¦åˆ™å¯èƒ½ä¼šé‡åˆ°ç±»ä¼¼ `ModuleNotFound` 的错误。在安装一个版本之å‰ï¼Œéœ€è¦å…ˆå¸è½½å¦ä¸€ä¸ªã€‚`如果CUDAå¯ç”¨ï¼Œå¼ºçƒˆæŽ¨è安装mmcv-full`。 +``` + +a. 安装完整版 + +在安装 mmcv-full 之å‰ï¼Œè¯·ç¡®ä¿ PyTorch å·²ç»æˆåŠŸå®‰è£…åœ¨çŽ¯å¢ƒä¸­ï¼Œå¯ä»¥å‚考 PyTorch 官方[文档](https://pytorch.org/)。 + +我们æä¾›äº†ä¸åŒ PyTorch å’Œ CUDA 版本的 mmcv-full 预编译包,å¯ä»¥å¤§å¤§ç®€åŒ–用户安装编译过程。强烈推è通过预编译包æ¥å®‰è£…。å¦å¤–,安装完æˆåŽå¯ä»¥è¿è¡Œ [check_installation.py](https://github.com/open-mmlab/mmcv/.dev_scripts/check_installation.py) 脚本检查 mmcv-full 是å¦å®‰è£…æˆåŠŸã€‚ + +i. 安装最新版本 + +如下是安装最新版 ``mmcv-full`` 的命令 + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html +``` + +请将链接中的 ``{cu_version}`` å’Œ ``{torch_version}`` æ ¹æ®è‡ªèº«éœ€æ±‚æ›¿æ¢æˆå®žé™…的版本å·ï¼Œä¾‹å¦‚想安装和 ``CUDA 11.1``ã€``PyTorch 1.9.0`` 兼容的最新版 ``mmcv-full``,使用如下替æ¢è¿‡çš„命令 + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html +``` + +```{note} +PyTorch 在 1.x.0 å’Œ 1.x.1 之间通常是兼容的,故 mmcv-full åªæä¾› 1.x.0 的编译包。如果你 +çš„ PyTorch 版本是 1.x.1,你å¯ä»¥æ”¾å¿ƒåœ°å®‰è£…在 1.x.0 版本编译的 mmcv-full。例如,如果你的 +PyTorch 版本是 1.8.1ã€CUDA 版本是 11.1,你å¯ä»¥ä½¿ç”¨ä»¥ä¸‹å‘½ä»¤å®‰è£… mmcv-full。 + +`pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html` +``` + +å¦‚æžœæƒ³çŸ¥é“æ›´å¤š CUDA å’Œ PyTorch 版本的命令,å¯ä»¥å‚考下é¢çš„表格,将链接中的 ``=={mmcv_version}`` 删去å³å¯ã€‚ + +ii. 安装特定的版本 + +如下是安装特定版本 ``mmcv-full`` 的命令 + +```shell +pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html +``` + +首先请å‚考版本å‘å¸ƒä¿¡æ¯æ‰¾åˆ°æƒ³è¦å®‰è£…的版本å·ï¼Œå°† ``{mmcv_version}`` æ›¿æ¢æˆè¯¥ç‰ˆæœ¬å·ï¼Œä¾‹å¦‚ ``1.3.9``。 +ç„¶åŽå°†é“¾æŽ¥ä¸­çš„ ``{cu_version}`` å’Œ ``{torch_version}`` æ ¹æ®è‡ªèº«éœ€æ±‚æ›¿æ¢æˆå®žé™…的版本å·ï¼Œä¾‹å¦‚想安装和 ``CUDA 11.1``ã€``PyTorch 1.9.0`` 兼容的 ``mmcv-full`` 1.3.9 版本,使用如下替æ¢è¿‡çš„命令 + +```shell +pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html +``` + +对于更多的 PyTorch å’Œ CUDA 版本组åˆï¼Œè¯·å‚考下表: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CUDA torch 1.10torch 1.9torch 1.8torch 1.7torch 1.6torch 1.5
11.3
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10.0/index.html
11.1
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
11.0
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu110/torch1.7.0/index.html
10.2
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.10.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.9.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.7.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.6.0/index.html
安装
pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu102/torch1.5.0/index.html
10.1
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.8.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.5.0/index.html
9.2
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cu92/torch1.5.0/index.html
cpu
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.10.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.7.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.6.0/index.html
安装
 pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.5.0/index.html
+ +```{note} +以上æä¾›çš„预编译包并ä¸å›Šæ‹¬æ‰€æœ‰çš„ mmcv-full 版本,我们å¯ä»¥ç‚¹å‡»å¯¹åº”链接查看支æŒçš„版本。例如,点击 [cu102-torch1.8.0](https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html),å¯ä»¥çœ‹åˆ° `cu102-torch1.8.0` åªæä¾›äº† 1.3.0 åŠä»¥ä¸Šçš„ mmcv-full 版本。å¦å¤–,从 `mmcv v1.3.17` 开始,我们ä¸å†æä¾›`PyTorch 1.3 & 1.4` 对应的 mmcv-full 预编译包。你å¯ä»¥åœ¨ [è¿™](./docs_zh_CN/get_started/previous_versions.md) 找到 `PyTorch 1.3 & 1.4` 对应的预编包。虽然我们ä¸å†æä¾› `PyTorch 1.3 & 1.4` 对应的预编译包,但是我们ä¾ç„¶åœ¨ CI 中ä¿è¯å¯¹å®ƒä»¬çš„兼容æŒç»­åˆ°ä¸‹ä¸€å¹´ã€‚ +``` + +除了使用预编译包之外,å¦ä¸€ç§æ–¹å¼æ˜¯åœ¨æœ¬åœ°è¿›è¡Œç¼–译,直接è¿è¡Œä¸‹è¿°å‘½ä»¤ + +```python +pip install mmcv-full +``` + +ä½†æ³¨æ„æœ¬åœ°ç¼–译å¯èƒ½ä¼šè€—æ—¶ 10 分钟以上。 + +b. 安装精简版 + +```python +pip install mmcv +``` + +c. 安装完整版并且编译 onnxruntime çš„è‡ªå®šä¹‰ç®—å­ + +- 详细的指å—请查看 [这里](https://mmcv.readthedocs.io/zh_CN/latest/deployment/onnxruntime_custom_ops.html)。 + +如果想从æºç ç¼–译 MMCV,请å‚考[该文档](https://mmcv.readthedocs.io/zh_CN/latest/get_started/build.html)。 diff --git a/docs_zh_CN/get_started/introduction.md b/docs_zh_CN/get_started/introduction.md new file mode 100644 index 0000000..0082ae8 --- /dev/null +++ b/docs_zh_CN/get_started/introduction.md @@ -0,0 +1,30 @@ +## ä»‹ç» MMCV + +MMCV 是一个é¢å‘计算机视觉的基础库,它支æŒäº†å¾ˆå¤šå¼€æºé¡¹ç›®ï¼Œä¾‹å¦‚: + +- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图åƒåˆ†ç±»å·¥å…·ç®± +- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱 +- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D ç›®æ ‡æ£€æµ‹å¹³å° +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱 +- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频ç†è§£å·¥å…·ç®± +- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab ä¸€ä½“åŒ–è§†é¢‘ç›®æ ‡æ„ŸçŸ¥å¹³å° +- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab å§¿æ€ä¼°è®¡å·¥å…·ç®± +- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图åƒè§†é¢‘编辑工具箱 +- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab å…¨æµç¨‹æ–‡å­—检测识别ç†è§£å·¥å…·åŒ… +- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab å›¾ç‰‡è§†é¢‘ç”Ÿæˆæ¨¡åž‹å·¥å…·ç®± + +MMCV æä¾›äº†å¦‚下众多功能: + +- 通用的 IO æŽ¥å£ +- 图åƒå’Œè§†é¢‘å¤„ç† +- 图åƒå’Œæ ‡æ³¨ç»“æžœå¯è§†åŒ– +- 常用å°å·¥å…·ï¼ˆè¿›åº¦æ¡ï¼Œè®¡æ—¶å™¨ç­‰ï¼‰ +- 基于 PyTorch 的通用训练框架 +- å¤šç§ CNN 网络结构 +- 高质é‡å®žçŽ°çš„å¸¸è§ CUDA ç®—å­ + +如想了解更多特性和使用,请å‚考[文档](https://mmcv.readthedocs.io/zh_CN/latest)。 + +```{note} +MMCV éœ€è¦ Python 3.6 以上版本。 +``` diff --git a/docs/zh_cn/get_started/previous_versions.md b/docs_zh_CN/get_started/previous_versions.md similarity index 93% rename from docs/zh_cn/get_started/previous_versions.md rename to docs_zh_CN/get_started/previous_versions.md index d543818..56679d4 100644 --- a/docs/zh_cn/get_started/previous_versions.md +++ b/docs_zh_CN/get_started/previous_versions.md @@ -1,10 +1,11 @@ + ## 其他版本的 PyTorch 我们ä¸å†æä¾›åœ¨è¾ƒä½Žçš„ `PyTorch` 版本下编译的 `mmcv-full` 包,但为了您的方便,您å¯ä»¥åœ¨ä¸‹é¢æ‰¾åˆ°å®ƒä»¬ã€‚ ### PyTorch 1.4 -| 1.0.0 \<= mmcv_version \<= 1.2.1 +| 1.0.0 <= mmcv_version <= 1.2.1 #### CUDA 10.1 @@ -26,7 +27,7 @@ pip install mmcv-full=={mmcv_version} -f https://download.openmmlab.com/mmcv/dis ### PyTorch v1.3 -| 1.0.0 \<= mmcv_version \<= 1.3.16 +| 1.0.0 <= mmcv_version <= 1.3.16 #### CUDA 10.1 diff --git a/docs/zh_cn/index.rst b/docs_zh_CN/index.rst similarity index 65% rename from docs/zh_cn/index.rst rename to docs_zh_CN/index.rst index 98cf088..b6d00a5 100644 --- a/docs/zh_cn/index.rst +++ b/docs_zh_CN/index.rst @@ -10,22 +10,30 @@ get_started/introduction.md get_started/installation.md get_started/build.md - get_started/article.md .. toctree:: :maxdepth: 2 :caption: 深入ç†è§£ MMCV + understand_mmcv/config.md + understand_mmcv/registry.md + understand_mmcv/runner.md + understand_mmcv/io.md understand_mmcv/data_process.md - understand_mmcv/data_transform.md understand_mmcv/visualization.md understand_mmcv/cnn.md understand_mmcv/ops.md + understand_mmcv/utils.md .. toctree:: - :caption: è¯­è¨€åˆ‡æ¢ + :maxdepth: 2 + :caption: 部署 - switch_language.md + deployment/onnx.md + deployment/onnxruntime_op.md + deployment/onnxruntime_custom_ops.md + deployment/tensorrt_plugin.md + deployment/tensorrt_custom_ops.md .. toctree:: :maxdepth: 2 @@ -34,6 +42,8 @@ compatibility.md .. toctree:: + :maxdepth: 2 + :caption: 常è§é—®é¢˜ faq.md @@ -43,20 +53,12 @@ community/contributing.md community/pr.md - community/code_style.md .. toctree:: - :maxdepth: 1 + :maxdepth: 2 :caption: API 文档 - mmcv.image - mmcv.video - mmcv.visualization - mmcv.cnn - mmcv.ops - mmcv.transforms - mmcv.arraymisc - mmcv.utils + api.rst Indices and tables diff --git a/docs/zh_cn/make.bat b/docs_zh_CN/make.bat similarity index 100% rename from docs/zh_cn/make.bat rename to docs_zh_CN/make.bat diff --git a/docs/zh_cn/mmcv-logo.png b/docs_zh_CN/mmcv-logo.png similarity index 100% rename from docs/zh_cn/mmcv-logo.png rename to docs_zh_CN/mmcv-logo.png diff --git a/docs_zh_CN/understand_mmcv/cnn.md b/docs_zh_CN/understand_mmcv/cnn.md new file mode 100644 index 0000000..9027cf3 --- /dev/null +++ b/docs_zh_CN/understand_mmcv/cnn.md @@ -0,0 +1,525 @@ +## å·ç§¯ç¥žç»ç½‘络 + +我们为å·ç§¯ç¥žç»ç½‘络æä¾›äº†ä¸€äº›æž„建模å—ï¼ŒåŒ…æ‹¬å±‚æž„å»ºã€æ¨¡å—组件和æƒé‡åˆå§‹åŒ–。 + +### 网络层的构建 + +在è¿è¡Œå®žéªŒæ—¶ï¼Œæˆ‘们å¯èƒ½éœ€è¦å°è¯•åŒå±žä¸€ç§ç±»åž‹ä½†ä¸åŒé…置的层,但åˆä¸å¸Œæœ›æ¯æ¬¡éƒ½ä¿®æ”¹ä»£ç ã€‚于是我们æä¾›ä¸€äº›å±‚构建方法,å¯ä»¥ä»Žå­—典构建层,字典å¯ä»¥åœ¨é…置文件中é…置,也å¯ä»¥é€šè¿‡å‘½ä»¤è¡Œå‚数指定。 + +#### 用法 + +一个简å•的例å­ï¼š + +```python +cfg = dict(type='Conv3d') +layer = build_conv_layer(cfg, in_channels=3, out_channels=8, kernel_size=3) +``` + +- `build_conv_layer`: 支æŒçš„类型包括 Conv1dã€Conv2dã€Conv3dã€Conv (Conv是Conv2d的别å) +- `build_norm_layer`: 支æŒçš„类型包括 BN1dã€BN2dã€BN3dã€BN (alias for BN2d)ã€SyncBNã€GNã€LNã€IN1dã€IN2dã€IN3dã€IN(IN是IN2d的别å) +- `build_activation_layer`:支æŒçš„类型包括 ReLUã€LeakyReLUã€PReLUã€RReLUã€ReLU6ã€ELUã€Sigmoidã€Tanhã€GELU +- `build_upsample_layer`: 支æŒçš„类型包括 nearestã€bilinearã€deconvã€pixel_shuffle +- `build_padding_layer`: 支æŒçš„类型包括 zeroã€reflectã€replicate + +#### 拓展 + +我们还å…è®¸è‡ªå®šä¹‰å±‚å’Œç®—å­æ¥æ‰©å±•构建方法。 + +1. 编写和注册自己的模å—: + + ```python + from mmcv.cnn import UPSAMPLE_LAYERS + + @UPSAMPLE_LAYERS.register_module() + class MyUpsample: + + def __init__(self, scale_factor): + pass + + def forward(self, x): + pass + ``` + +2. 在æŸå¤„导入 `MyUpsample` (例如 `__init__.py` )然åŽä½¿ç”¨å®ƒï¼š + + ```python + cfg = dict(type='MyUpsample', scale_factor=2) + layer = build_upsample_layer(cfg) + ``` + +### 模å—组件 + +我们还æä¾›äº†å¸¸ç”¨çš„æ¨¡å—组件,以方便网络构建。 +å·ç§¯ç»„ä»¶ `ConvModule` ç”± convolutionã€normalization以åŠactivation layers 组æˆï¼Œæ›´å¤šç»†èŠ‚è¯·å‚考 [ConvModule api](api.html#mmcv.cnn.ConvModule)。 + +```python +# conv + bn + relu +conv = ConvModule(3, 8, 2, norm_cfg=dict(type='BN')) +# conv + gn + relu +conv = ConvModule(3, 8, 2, norm_cfg=dict(type='GN', num_groups=2)) +# conv + relu +conv = ConvModule(3, 8, 2) +# conv +conv = ConvModule(3, 8, 2, act_cfg=None) +# conv + leaky relu +conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='LeakyReLU')) +# bn + conv + relu +conv = ConvModule( + 3, 8, 2, norm_cfg=dict(type='BN'), order=('norm', 'conv', 'act')) +``` + +### Weight initialization + +> 实现细节å¯ä»¥åœ¨ [mmcv/cnn/utils/weight_init.py](../../mmcv/cnn/utils/weight_init.py)中找到 + +在训练过程中,适当的åˆå§‹åŒ–策略有利于加快训练速度或者获得更高的性能。 在MMCV中,我们æä¾›äº†ä¸€äº›å¸¸ç”¨çš„æ–¹æ³•æ¥åˆå§‹åŒ–模å—,比如 `nn.Conv2d` 模å—。当然,我们也æä¾›äº†ä¸€äº›é«˜çº§API,å¯ç”¨äºŽåˆå§‹åŒ–包å«ä¸€ä¸ªæˆ–多个模å—的模型。 + +#### Initialization functions + +以函数的方å¼åˆå§‹åŒ– `nn.Module` ,例如 `nn.Conv2d` 〠`nn.Linear` 等。 + +我们æä¾›ä»¥ä¸‹åˆå§‹åŒ–方法, + +- constant_init + + 使用给定常é‡å€¼åˆå§‹åŒ–æ¨¡åž‹å‚æ•° + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import constant_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # constant_init(module, val, bias=0) + >>> constant_init(conv1, 1, 0) + >>> conv1.weight + ``` + +- xavier_init + + 按照 [Understanding the difficulty of training deep feedforward neural networks - Glorot, X. & Bengio, Y. (2010)](http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf) æè¿°çš„æ–¹æ³•åˆå§‹åŒ–æ¨¡åž‹å‚æ•° + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import xavier_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # xavier_init(module, gain=1, bias=0, distribution='normal') + >>> xavier_init(conv1, distribution='normal') + ``` + +- normal_init + + 使用正æ€åˆ†å¸ƒï¼ˆé«˜æ–¯åˆ†å¸ƒï¼‰åˆå§‹åŒ–æ¨¡åž‹å‚æ•° + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import normal_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # normal_init(module, mean=0, std=1, bias=0) + >>> normal_init(conv1, std=0.01, bias=0) + ``` + +- uniform_init + + 使用å‡åŒ€åˆ†å¸ƒåˆå§‹åŒ–æ¨¡åž‹å‚æ•° + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import uniform_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # uniform_init(module, a=0, b=1, bias=0) + >>> uniform_init(conv1, a=0, b=1) + ``` + +- kaiming_init + + 按照 [Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification - He, K. et al. (2015)](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/He_Delving_Deep_into_ICCV_2015_paper.pdf) æè¿°çš„æ–¹æ³•æ¥åˆå§‹åŒ–æ¨¡åž‹å‚æ•°ã€‚ + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import kaiming_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # kaiming_init(module, a=0, mode='fan_out', nonlinearity='relu', bias=0, distribution='normal') + >>> kaiming_init(conv1) + ``` + +- caffe2_xavier_init + + caffe2中实现的 `xavier initialization`,对应于 PyTorch中的 `kaiming_uniform_` + + ```python + >>> import torch.nn as nn + >>> from mmcv.cnn import caffe2_xavier_init + >>> conv1 = nn.Conv2d(3, 3, 1) + >>> # caffe2_xavier_init(module, bias=0) + >>> caffe2_xavier_init(conv1) + ``` + +- bias_init_with_prob + + æ ¹æ®ç»™å®šçš„æ¦‚率åˆå§‹åŒ– `conv/fc`, 这在 [Focal Loss for Dense Object Detection](https://arxiv.org/pdf/1708.02002.pdf) æå‡ºã€‚ + + ```python + >>> from mmcv.cnn import bias_init_with_prob + >>> # bias_init_with_prob is proposed in Focal Loss + >>> bias = bias_init_with_prob(0.01) + >>> bias + -4.59511985013459 + ``` + +#### Initializers and configs + +在åˆå§‹åŒ–方法的基础上,我们定义了相应的åˆå§‹åŒ–类,并将它们注册到 `INITIALIZERS` 中,这样我们就å¯ä»¥ä½¿ç”¨ `config` é…ç½®æ¥åˆå§‹åŒ–模型了。 + +我们æä¾›ä»¥ä¸‹åˆå§‹åŒ–类: + +- ConstantInit +- XavierInit +- NormalInit +- UniformInit +- KaimingInit +- Caffe2XavierInit +- PretrainedInit + +接下æ¥è¯¦ç»†ä»‹ç» `initialize` 的使用方法 + +1. 通过关键字 `layer` æ¥åˆå§‹åŒ–模型 + + 如果我们åªå®šä¹‰äº†å…³é”®å­— `layer` ,那么åªåˆå§‹åŒ– `layer` 中包å«çš„层。 + + 注æ„: 关键字 `layer` 支æŒçš„æ¨¡å—是带有 weights å’Œ bias 属性的 PyTorch 模å—ï¼Œæ‰€ä»¥ä¸æ”¯æŒ `MultiheadAttention layer` + +- 定义关键字 `layer` 列表并使用相åŒç›¸åŒé…ç½®åˆå§‹åŒ–æ¨¡å— + + ```python + import torch.nn as nn + from mmcv.cnn import initialize + + class FooNet(nn.Module): + def __init__(self): + super().__init__() + self.feat = nn.Conv1d(3, 1, 3) + self.reg = nn.Conv2d(3, 3, 3) + self.cls = nn.Linear(1, 2) + + model = FooNet() + init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d', 'Linear'], val=1) + # 使用相åŒçš„é…ç½®åˆå§‹åŒ–æ•´ä¸ªæ¨¡å— + initialize(model, init_cfg) + # model.feat.weight + # Parameter containing: + # tensor([[[1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.]]], requires_grad=True) + ``` + +- 定义关键字 `layer` 用于åˆå§‹åŒ–ä¸åŒé…置的层 + + ```python + import torch.nn as nn + from mmcv.cnn.utils import initialize + + class FooNet(nn.Module): + def __init__(self): + super().__init__() + self.feat = nn.Conv1d(3, 1, 3) + self.reg = nn.Conv2d(3, 3, 3) + self.cls = nn.Linear(1,2) + + model = FooNet() + init_cfg = [dict(type='Constant', layer='Conv1d', val=1), + dict(type='Constant', layer='Conv2d', val=2), + dict(type='Constant', layer='Linear', val=3)] + # nn.Conv1d 使用 dict(type='Constant', val=1) åˆå§‹åŒ– + # nn.Conv2d 使用 dict(type='Constant', val=2) åˆå§‹åŒ– + # nn.Linear 使用 dict(type='Constant', val=3) åˆå§‹åŒ– + initialize(model, init_cfg) + # model.reg.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + ``` + +2. 定义关键字`override`åˆå§‹åŒ–模型 + +- 当用属性ååˆå§‹åŒ–æŸä¸ªç‰¹å®šéƒ¨åˆ†æ—¶, 我们å¯ä»¥ä½¿ç”¨å…³é”®å­— `override`, 关键字 `override` 对应的Value会替代init_cfg中相应的值 + + ```python + import torch.nn as nn + from mmcv.cnn import initialize + + class FooNet(nn.Module): + def __init__(self): + super().__init__() + self.feat = nn.Conv1d(3, 1, 3) + self.reg = nn.Conv2d(3, 3, 3) + self.cls = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) + + # 如果我们想将模型的æƒé‡åˆå§‹åŒ–为 1,将åå·®åˆå§‹åŒ–为 2 + # 但希望 `cls` 中的æƒé‡ä¸º 3,å差为 4,则我们å¯ä»¥ä½¿ç”¨å…³é”®å­—override + + model = FooNet() + init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], val=1, bias=2, + override=dict(type='Constant', name='reg', val=3, bias=4)) + # 使用 dict(type='Constant', val=1, bias=2)æ¥åˆå§‹åŒ– self.feat and self.cls + # 使用dict(type='Constant', val=3, bias=4)æ¥åˆå§‹åŒ–‘reg’模å—。 + initialize(model, init_cfg) + # model.reg.weight + # Parameter containing: + # tensor([[[[3., 3., 3.], + # [3., 3., 3.], + # [3., 3., 3.]], + # ..., + # [[3., 3., 3.], + # [3., 3., 3.], + # [3., 3., 3.]]]], requires_grad=True) + ``` + +- 如果 init_cfg 中的关键字`layer`为None,则åªåˆå§‹åŒ–在关键字overrideä¸­çš„å­æ¨¡å—,并且çœç•¥override中的 type å’Œå…¶ä»–å‚æ•° + + ```python + model = FooNet() + init_cfg = dict(type='Constant', val=1, bias=2, override=dict(name='reg')) + # self.feat å’Œ self.cls 使用pyTorch默认的åˆå§‹åŒ– + # 将使用 dict(type='Constant', val=1, bias=2) åˆå§‹åŒ–å为 'reg' çš„æ¨¡å— + initialize(model, init_cfg) + # model.reg.weight + # Parameter containing: + # tensor([[[[1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.]], + # ..., + # [[1., 1., 1.], + # [1., 1., 1.], + # [1., 1., 1.]]]], requires_grad=True) + ``` + +- 如果我们没有定义关键字`layer`或`override` , å°†ä¸ä¼šåˆå§‹åŒ–任何东西 + +- 关键字`override`的无效用法 + + ```python + # 没有é‡å†™ä»»ä½•å­æ¨¡å— + init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], + val=1, bias=2, + override=dict(type='Constant', val=3, bias=4)) + + # 没有指定type,å³ä¾¿æœ‰å…¶ä»–傿•°ï¼Œä¹Ÿæ˜¯æ— æ•ˆçš„。 + init_cfg = dict(type='Constant', layer=['Conv1d','Conv2d'], + val=1, bias=2, + override=dict(name='reg', val=3, bias=4)) + ``` + +3. 用预训练模型åˆå§‹åŒ– + + ```python + import torch.nn as nn + import torchvision.models as models + from mmcv.cnn import initialize + + # 使用预训练模型æ¥åˆå§‹åŒ– + model = models.resnet50() + # model.conv1.weight + # Parameter containing: + # tensor([[[[-6.7435e-03, -2.3531e-02, -9.0143e-03, ..., -2.1245e-03, + # -1.8077e-03, 3.0338e-03], + # [-1.2603e-02, -2.7831e-02, 2.3187e-02, ..., -1.5793e-02, + # 1.1655e-02, 4.5889e-03], + # [-3.7916e-02, 1.2014e-02, 1.3815e-02, ..., -4.2651e-03, + # 1.7314e-02, -9.9998e-03], + # ..., + + init_cfg = dict(type='Pretrained', + checkpoint='torchvision://resnet50') + initialize(model, init_cfg) + # model.conv1.weight + # Parameter containing: + # tensor([[[[ 1.3335e-02, 1.4664e-02, -1.5351e-02, ..., -4.0896e-02, + # -4.3034e-02, -7.0755e-02], + # [ 4.1205e-03, 5.8477e-03, 1.4948e-02, ..., 2.2060e-03, + # -2.0912e-02, -3.8517e-02], + # [ 2.2331e-02, 2.3595e-02, 1.6120e-02, ..., 1.0281e-01, + # 6.2641e-02, 5.1977e-02], + # ..., + + # 使用关键字'prefix'用预训练模型的特定部分æ¥åˆå§‹åŒ–å­æ¨¡å—æƒé‡ + model = models.resnet50() + url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ + 'retinanet_r50_fpn_1x_coco/'\ + 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' + init_cfg = dict(type='Pretrained', + checkpoint=url, prefix='backbone.') + initialize(model, init_cfg) + ``` + +4. åˆå§‹åŒ–继承自BaseModuleã€Sequentialã€ModuleList的模型 + + `BaseModule` 继承自 `torch.nn.Module`, 它们之间唯一的ä¸åŒæ˜¯ `BaseModule` 实现了 `init_weight` + + `Sequential` 继承自 `BaseModule` å’Œ `torch.nn.Sequential` + + `ModuleList` 继承自 `BaseModule` å’Œ `torch.nn.ModuleList` + + `````python + import torch.nn as nn + from mmcv.runner import BaseModule, Sequential, ModuleList + + class FooConv1d(BaseModule): + + def __init__(self, init_cfg=None): + super().__init__(init_cfg) + self.conv1d = nn.Conv1d(4, 1, 4) + + def forward(self, x): + return self.conv1d(x) + + class FooConv2d(BaseModule): + + def __init__(self, init_cfg=None): + super().__init__(init_cfg) + self.conv2d = nn.Conv2d(3, 1, 3) + + def forward(self, x): + return self.conv2d(x) + + # BaseModule + init_cfg = dict(type='Constant', layer='Conv1d', val=0., bias=1.) + model = FooConv1d(init_cfg) + model.init_weights() + # model.conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + + # Sequential + init_cfg1 = dict(type='Constant', layer='Conv1d', val=0., bias=1.) + init_cfg2 = dict(type='Constant', layer='Conv2d', val=2., bias=3.) + model1 = FooConv1d(init_cfg1) + model2 = FooConv2d(init_cfg2) + seq_model = Sequential(model1, model2) + seq_model.init_weights() + # seq_model[0].conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + # seq_model[1].conv2d.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + + # inner init_cfg has higher priority + model1 = FooConv1d(init_cfg1) + model2 = FooConv2d(init_cfg2) + init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) + seq_model = Sequential(model1, model2, init_cfg=init_cfg) + seq_model.init_weights() + # seq_model[0].conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + # seq_model[1].conv2d.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + + # ModuleList + model1 = FooConv1d(init_cfg1) + model2 = FooConv2d(init_cfg2) + modellist = ModuleList([model1, model2]) + modellist.init_weights() + # modellist[0].conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + # modellist[1].conv2d.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + + # inner init_cfg has higher priority + model1 = FooConv1d(init_cfg1) + model2 = FooConv2d(init_cfg2) + init_cfg = dict(type='Constant', layer=['Conv1d', 'Conv2d'], val=4., bias=5.) + modellist = ModuleList([model1, model2], init_cfg=init_cfg) + modellist.init_weights() + # modellist[0].conv1d.weight + # Parameter containing: + # tensor([[[0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.], + # [0., 0., 0., 0.]]], requires_grad=True) + # modellist[1].conv2d.weight + # Parameter containing: + # tensor([[[[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]], + # ..., + # [[2., 2., 2.], + # [2., 2., 2.], + # [2., 2., 2.]]]], requires_grad=True) + ````` + +### Model Zoo + +除了`torchvision`的预训练模型,我们还æä¾›ä»¥ä¸‹ CNN 的预训练模型: + +- VGG Caffe +- ResNet Caffe +- ResNeXt +- ResNet with Group Normalization +- ResNet with Group Normalization and Weight Standardization +- HRNetV2 +- Res2Net +- RegNet + +#### Model URLs in JSON + +MMCV中的Model Zoo Link ç”± JSON 文件管ç†ã€‚ json 文件由模型åç§°åŠå…¶url或path的键值对组æˆ,一个json文件å¯èƒ½ç±»ä¼¼äºŽ: + +```json +{ + "model_a": "https://example.com/models/model_a_9e5bac.pth", + "model_b": "pretrain/model_b_ab3ef2c.pth" +} +``` + +å¯ä»¥åœ¨[此处](https://github.com/open-mmlab/mmcv/blob/master/mmcv/model_zoo/open_mmlab.json)找到托管在 OpenMMLab AWS 上的预训练模型的默认链接。 + +ä½ å¯ä»¥é€šè¿‡å°† `open-mmlab.json` 放在 `MMCV_HOME`下æ¥è¦†ç›–默认链接,如果在环境中找ä¸åˆ°`MMCV_HOME`,则默认使用 `~/.cache/mmcv`。当然你也å¯ä»¥ä½¿ç”¨å‘½ä»¤ `export MMCV_HOME=/your/path`æ¥è®¾ç½®è‡ªå·±çš„路径。 + +外部的json文件将被åˆå¹¶ä¸ºé»˜è®¤æ–‡ä»¶ï¼Œå¦‚果相åŒçš„键出现在外部`json`和默认`json`中,则将使用外部`json`。 + +#### Load Checkpoint + +`mmcv.load_checkpoint()`çš„å‚æ•°`filename`支æŒä»¥ä¸‹ç±»åž‹ï¼š + +- filepath: `checkpoint`路径 +- `http://xxx` and `https://xxx`: 下载checkpoint的链接,文件å中必需包å«`SHA256`åŽç¼€ +- `torchvision://xxx`: `torchvision.models`中的模型链接,更多细节å‚考 [torchvision](https://pytorch.org/docs/stable/torchvision/models.html) +- `open-mmlab://xxx`: 默认和其他 json 文件中æä¾›çš„æ¨¡åž‹é“¾æŽ¥æˆ–文件路径 diff --git a/docs_zh_CN/understand_mmcv/config.md b/docs_zh_CN/understand_mmcv/config.md new file mode 100644 index 0000000..c6da308 --- /dev/null +++ b/docs_zh_CN/understand_mmcv/config.md @@ -0,0 +1,176 @@ +## é…ç½® + +`Config` 类用于æ“作é…置文件,它支æŒä»Žå¤šç§æ–‡ä»¶æ ¼å¼ä¸­åŠ è½½é…置,包括 **python**, **json** å’Œ **yaml**。 +它æä¾›äº†ç±»ä¼¼å­—å…¸å¯¹è±¡çš„æŽ¥å£æ¥èŽ·å–和设置值。 + +以é…置文件 `test.py` 为例 + +```python +a = 1 +b = dict(b1=[0, 1, 2], b2=None) +c = (1, 2) +d = 'string' +``` + +加载与使用é…置文件 + +```python +>>> cfg = Config.fromfile('test.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b1=[0, 1, 2], b2=None), +... c=(1, 2), +... d='string') +``` + +对于所有格å¼çš„é…置文件,都支æŒä¸€äº›é¢„定义å˜é‡ã€‚它会将 `{{ var }}` 替æ¢ä¸ºå®žé™…值。 + +ç›®å‰æ”¯æŒä»¥ä¸‹å››ä¸ªé¢„定义å˜é‡ï¼š + +`{{ fileDirname }}` - 当剿‰“开文件的目录å,例如 /home/your-username/your-project/folder + +`{{ fileBasename }}` - 当剿‰“开文件的文件å,例如 file.ext + +`{{ fileBasenameNoExtension }}` - 当剿‰“开文件ä¸åŒ…嫿‰©å±•å的文件å,例如 file + +`{{ fileExtname }}` - 当剿‰“开文件的扩展å,例如 .ext + +这些å˜é‡å引用自 [VS Code](https://code.visualstudio.com/docs/editor/variables-reference)。 + +这里是一个带有预定义å˜é‡çš„é…置文件的例å­ã€‚ + +`config_a.py` +```python +a = 1 +b = './work_dir/{{ fileBasenameNoExtension }}' +c = '{{ fileExtname }}' +``` + +```python +>>> cfg = Config.fromfile('./config_a.py') +>>> print(cfg) +>>> dict(a=1, +... b='./work_dir/config_a', +... c='.py') +``` + +对于所有格å¼çš„é…置文件, 都支æŒç»§æ‰¿ã€‚为了é‡ç”¨å…¶ä»–é…置文件的字段, +éœ€è¦æŒ‡å®š `_base_='./config_a.py'` 或者一个包å«é…置文件的列表 `_base_=['./config_a.py', './config_b.py']`。 + +这里有 4 个é…置继承关系的例å­ã€‚ + +`config_a.py` 作为基类é…置文件 + +```python +a = 1 +b = dict(b1=[0, 1, 2], b2=None) +``` +### ä¸å«é‡å¤é”®å€¼å¯¹ä»ŽåŸºç±»é…置文件继承 + +`config_b.py` + +```python +_base_ = './config_a.py' +c = (1, 2) +d = 'string' +``` + +```python +>>> cfg = Config.fromfile('./config_b.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b1=[0, 1, 2], b2=None), +... c=(1, 2), +... d='string') +``` +在`config_b.py`里的新字段与在`config_a.py`里的旧字段拼接 + +### å«é‡å¤é”®å€¼å¯¹ä»ŽåŸºç±»é…置文件继承 + +`config_c.py` + +```python +_base_ = './config_a.py' +b = dict(b2=1) +c = (1, 2) +``` + +```python +>>> cfg = Config.fromfile('./config_c.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b1=[0, 1, 2], b2=1), +... c=(1, 2)) +``` + +在基类é…置文件:`config_a` 里的 `b.b2=None`被é…置文件:`config_c.py`里的 `b.b2=1`替代。 + +### 从具有忽略字段的é…置文件继承 + +`config_d.py` + +```python +_base_ = './config_a.py' +b = dict(_delete_=True, b2=None, b3=0.1) +c = (1, 2) +``` + +```python +>>> cfg = Config.fromfile('./config_d.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b2=None, b3=0.1), +... c=(1, 2)) +``` + +您还å¯ä»¥è®¾ç½® `_delete_=True`忽略基类é…置文件中的æŸäº›å­—段。所有在`b`中的旧键 `b1, b2, b3` 将会被新键 `b2, b3` 所å–代。 + +### 从多个基类é…置文件继承(基类é…置文件ä¸åº”包å«ç›¸åŒçš„键) + +`config_e.py` + +```python +c = (1, 2) +d = 'string' +``` + +`config_f.py` + +```python +_base_ = ['./config_a.py', './config_e.py'] +``` + +```python +>>> cfg = Config.fromfile('./config_f.py') +>>> print(cfg) +>>> dict(a=1, +... b=dict(b1=[0, 1, 2], b2=None), +... c=(1, 2), +... d='string') +``` + +### 从基类引用å˜é‡ + +您å¯ä»¥ä½¿ç”¨ä»¥ä¸‹è¯­æ³•引用在基类中定义的å˜é‡ã€‚ + +`base.py` + +```python +item1 = 'a' +item2 = dict(item3 = 'b') +``` + +`config_g.py` + +```python +_base_ = ['./base.py'] +item = dict(a = {{ _base_.item1 }}, b = {{ _base_.item2.item3 }}) +``` + +```python +>>> cfg = Config.fromfile('./config_g.py') +>>> print(cfg.pretty_text) +item1 = 'a' +item2 = dict(item3='b') +item = dict(a='a', b='b') +``` diff --git a/docs/zh_cn/understand_mmcv/data_process.md b/docs_zh_CN/understand_mmcv/data_process.md similarity index 93% rename from docs/zh_cn/understand_mmcv/data_process.md rename to docs_zh_CN/understand_mmcv/data_process.md index 7e0afd1..0885fe0 100644 --- a/docs/zh_cn/understand_mmcv/data_process.md +++ b/docs_zh_CN/understand_mmcv/data_process.md @@ -130,7 +130,7 @@ bboxes = np.array([[10, 10, 100, 120], [0, 0, 50, 50]]) patches = mmcv.imcrop(img, bboxes) # è£å‰ªä¸¤ä¸ªåŒºåŸŸå¹¶ä¸”缩放区域1.2å€ -patches = mmcv.imcrop(img, bboxes, scale=1.2) +patches = mmcv.imcrop(img, bboxes, scale_ratio=1.2) ``` #### å¡«å…… @@ -144,13 +144,13 @@ img = mmcv.imread('tests/data/color.jpg') img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=0) # 用给定值分别填充图åƒçš„3个通é“至 (1000, 1200) -img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=(100, 50, 200)) +img_ = mmcv.impad(img, shape=(1000, 1200), pad_val=[100, 50, 200]) # 用给定值填充图åƒçš„å·¦ã€å³ã€ä¸Šã€ä¸‹å››æ¡è¾¹ img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=0) # 用3个值分别填充图åƒçš„å·¦ã€å³ã€ä¸Šã€ä¸‹å››æ¡è¾¹çš„3ä¸ªé€šé“ -img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=(100, 50, 200)) +img_ = mmcv.impad(img, padding=(10, 20, 30, 40), pad_val=[100, 50, 200]) # 将图åƒçš„å››æ¡è¾¹å¡«å……至能够被给定值整除 img_ = mmcv.impad_to_multiple(img, 32) @@ -252,24 +252,24 @@ flow = mmcv.flowread('compressed.jpg', quantize=True, concat_axis=1) mmcv.flowshow(flow) ``` -![progress](../../en/_static/flow_visualization.png) +![progress](../../docs/_static/flow_visualization.png) -1. æµå˜æ¢ +3. æµå˜æ¢ ```python img1 = mmcv.imread('img1.jpg') flow = mmcv.flowread('flow.flo') -warped_img2 = mmcv.flow_warp(img1, flow) +warpped_img2 = mmcv.flow_warp(img1, flow) ``` img1 (å·¦) and img2 (å³) -![raw images](../../en/_static/flow_raw_images.png) +![raw images](../../docs/_static/flow_raw_images.png) å…‰æµ (img2 -> img1) -![optical flow](../../en/_static/flow_img2toimg1.png) +![optical flow](../../docs/_static/flow_img2toimg1.png) å˜æ¢åŽçš„图åƒå’ŒçœŸå®žå›¾åƒçš„差异 -![warped image](../../en/_static/flow_warp_diff.png) +![warpped image](../../docs/_static/flow_warp_diff.png) diff --git a/docs_zh_CN/understand_mmcv/io.md b/docs_zh_CN/understand_mmcv/io.md new file mode 100644 index 0000000..0e5002f --- /dev/null +++ b/docs_zh_CN/understand_mmcv/io.md @@ -0,0 +1,240 @@ +## 文件输入输出 + +æ–‡ä»¶è¾“å…¥è¾“å‡ºæ¨¡å—æä¾›äº†ä¸¤ä¸ªé€šç”¨çš„ API 接å£ç”¨äºŽè¯»å–å’Œä¿å­˜ä¸åŒæ ¼å¼çš„æ–‡ä»¶ã€‚ + +```{note} +在 v1.3.16 åŠä¹‹åŽçš„版本中,IO æ¨¡å—æ”¯æŒä»Žä¸åŒåŽç«¯è¯»å–æ•°æ®å¹¶æ”¯æŒå°†æ•°æ®è‡³ä¸åŒåŽç«¯ã€‚更多细节请访问 PR [#1330](https://github.com/open-mmlab/mmcv/pull/1330)。 +``` + +### 读å–å’Œä¿å­˜æ•°æ® + +`mmcv` æä¾›äº†ä¸€ä¸ªé€šç”¨çš„ api 用于读å–å’Œä¿å­˜æ•°æ®ï¼Œç›®å‰æ”¯æŒçš„æ ¼å¼æœ‰ jsonã€yaml å’Œ pickle。 + +#### ä»Žç¡¬ç›˜è¯»å–æ•°æ®æˆ–者将数æ®ä¿å­˜è‡³ç¡¬ç›˜ + +```python +import mmcv + +# ä»Žæ–‡ä»¶ä¸­è¯»å–æ•°æ® +data = mmcv.load('test.json') +data = mmcv.load('test.yaml') +data = mmcv.load('test.pkl') +# ä»Žæ–‡ä»¶å¯¹è±¡ä¸­è¯»å–æ•°æ® +with open('test.json', 'r') as f: + data = mmcv.load(f, file_format='json') + +# 将数æ®åºåˆ—化为字符串 +json_str = mmcv.dump(data, file_format='json') + +# 将数æ®ä¿å­˜è‡³æ–‡ä»¶ (æ ¹æ®æ–‡ä»¶ååŽç¼€å推文件类型) +mmcv.dump(data, 'out.pkl') + +# 将数æ®ä¿å­˜è‡³æ–‡ä»¶å¯¹è±¡ +with open('test.yaml', 'w') as f: + data = mmcv.dump(data, f, file_format='yaml') +``` + +#### 从其他åŽç«¯åŠ è½½æˆ–è€…ä¿å­˜è‡³å…¶ä»–åŽç«¯ + +```python +import mmcv + +# 从 s3 æ–‡ä»¶è¯»å–æ•°æ® +data = mmcv.load('s3://bucket-name/test.json') +data = mmcv.load('s3://bucket-name/test.yaml') +data = mmcv.load('s3://bucket-name/test.pkl') + +# 将数æ®ä¿å­˜è‡³ s3 文件 (æ ¹æ®æ–‡ä»¶ååŽç¼€å推文件类型) +mmcv.dump(data, 's3://bucket-name/out.pkl') +``` + +我们æä¾›äº†æ˜“于拓展的方å¼ä»¥æ”¯æŒæ›´å¤šçš„æ–‡ä»¶æ ¼å¼ã€‚我们åªéœ€è¦åˆ›å»ºä¸€ä¸ªç»§æ‰¿è‡ª `BaseFileHandler` çš„ +æ–‡ä»¶å¥æŸ„类并将其注册到 `mmcv` 中å³å¯ã€‚奿Ÿ„类至少需è¦é‡å†™ä¸‰ä¸ªæ–¹æ³•。 + +```python +import mmcv + +# 支æŒä¸ºæ–‡ä»¶å¥æŸ„ç±»æ³¨å†Œå¤šä¸ªæ–‡ä»¶æ ¼å¼ +# @mmcv.register_handler(['txt', 'log']) +@mmcv.register_handler('txt') +class TxtHandler1(mmcv.BaseFileHandler): + + def load_from_fileobj(self, file): + return file.read() + + def dump_to_fileobj(self, obj, file): + file.write(str(obj)) + + def dump_to_str(self, obj, **kwargs): + return str(obj) +``` + +以 `PickleHandler` 为例 + +```python +import pickle + +class PickleHandler(mmcv.BaseFileHandler): + + def load_from_fileobj(self, file, **kwargs): + return pickle.load(file, **kwargs) + + def load_from_path(self, filepath, **kwargs): + return super(PickleHandler, self).load_from_path( + filepath, mode='rb', **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('protocol', 2) + return pickle.dumps(obj, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('protocol', 2) + pickle.dump(obj, file, **kwargs) + + def dump_to_path(self, obj, filepath, **kwargs): + super(PickleHandler, self).dump_to_path( + obj, filepath, mode='wb', **kwargs) +``` + +### è¯»å–æ–‡ä»¶å¹¶è¿”回列表或字典 + +例如, `a.txt` 是文本文件,一共有5行内容。 + +``` +a +b +c +d +e +``` +#### ä»Žç¡¬ç›˜è¯»å– + +使用 `list_from_file` è¯»å– `a.txt` + +```python +>>> mmcv.list_from_file('a.txt') +['a', 'b', 'c', 'd', 'e'] +>>> mmcv.list_from_file('a.txt', offset=2) +['c', 'd', 'e'] +>>> mmcv.list_from_file('a.txt', max_num=2) +['a', 'b'] +>>> mmcv.list_from_file('a.txt', prefix='/mnt/') +['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] +``` + +åŒæ ·ï¼Œ `b.txt` 也是文本文件,一共有3行内容 + +``` +1 cat +2 dog cow +3 panda +``` + +使用 `dict_from_file` è¯»å– `b.txt` + +```python +>>> mmcv.dict_from_file('b.txt') +{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} +>>> mmcv.dict_from_file('b.txt', key_type=int) +{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} +``` + +#### 从其他åŽç«¯è¯»å– + +使用 `list_from_file` è¯»å– `s3://bucket-name/a.txt` + +```python +>>> mmcv.list_from_file('s3://bucket-name/a.txt') +['a', 'b', 'c', 'd', 'e'] +>>> mmcv.list_from_file('s3://bucket-name/a.txt', offset=2) +['c', 'd', 'e'] +>>> mmcv.list_from_file('s3://bucket-name/a.txt', max_num=2) +['a', 'b'] +>>> mmcv.list_from_file('s3://bucket-name/a.txt', prefix='/mnt/') +['/mnt/a', '/mnt/b', '/mnt/c', '/mnt/d', '/mnt/e'] +``` + +使用 `dict_from_file` è¯»å– `b.txt` + +```python +>>> mmcv.dict_from_file('s3://bucket-name/b.txt') +{'1': 'cat', '2': ['dog', 'cow'], '3': 'panda'} +>>> mmcv.dict_from_file('s3://bucket-name/b.txt', key_type=int) +{1: 'cat', 2: ['dog', 'cow'], 3: 'panda'} +``` + +### 读å–å’Œä¿å­˜æƒé‡æ–‡ä»¶ + +#### ä»Žç¡¬ç›˜è¯»å–æƒé‡æ–‡ä»¶æˆ–者将æƒé‡æ–‡ä»¶ä¿å­˜è‡³ç¡¬ç›˜ + +我们å¯ä»¥é€šè¿‡ä¸‹é¢çš„æ–¹å¼ä»Žç£ç›˜è¯»å–æƒé‡æ–‡ä»¶æˆ–者将æƒé‡æ–‡ä»¶ä¿å­˜è‡³ç£ç›˜ + +```python +import torch + +filepath1 = '/path/of/your/checkpoint1.pth' +filepath2 = '/path/of/your/checkpoint2.pth' +# 从 filepath1 è¯»å–æƒé‡æ–‡ä»¶ +checkpoint = torch.load(filepath1) +# å°†æƒé‡æ–‡ä»¶ä¿å­˜è‡³ filepath2 +torch.save(checkpoint, filepath2) +``` + +MMCV æä¾›äº†å¾ˆå¤šåŽç«¯ï¼Œ`HardDiskBackend` 是其中一个,我们å¯ä»¥é€šè¿‡å®ƒæ¥è¯»å–或者ä¿å­˜æƒé‡æ–‡ä»¶ã€‚ + +```python +import io +from mmcv.fileio.file_client import HardDiskBackend + +disk_backend = HardDiskBackend() +with io.BytesIO(disk_backend.get(filepath1)) as buffer: + checkpoint = torch.load(buffer) +with io.BytesIO() as buffer: + torch.save(checkpoint, f) + disk_backend.put(f.getvalue(), filepath2) +``` + +如果我们想在接å£ä¸­å®žçŽ°æ ¹æ®æ–‡ä»¶è·¯å¾„自动选择对应的åŽç«¯ï¼Œæˆ‘们å¯ä»¥ä½¿ç”¨ `FileClient`。 +ä¾‹å¦‚ï¼Œæˆ‘ä»¬æƒ³å®žçŽ°ä¸¤ä¸ªæ–¹æ³•ï¼Œåˆ†åˆ«æ˜¯è¯»å–æƒé‡ä»¥åŠä¿å­˜æƒé‡ï¼Œå®ƒä»¬éœ€æ”¯æŒä¸åŒç±»åž‹çš„æ–‡ä»¶è·¯å¾„,å¯ä»¥æ˜¯ç£ç›˜è·¯å¾„,也å¯ä»¥æ˜¯ç½‘络路径或者其他路径。 + +```python +from mmcv.fileio.file_client import FileClient + +def load_checkpoint(path): + file_client = FileClient.infer(uri=path) + with io.BytesIO(file_client.get(path)) as buffer: + checkpoint = torch.load(buffer) + return checkpoint + +def save_checkpoint(checkpoint, path): + with io.BytesIO() as buffer: + torch.save(checkpoint, buffer) + file_client.put(buffer.getvalue(), path) + +file_client = FileClient.infer_client(uri=filepath1) +checkpoint = load_checkpoint(filepath1) +save_checkpoint(checkpoint, filepath2) +``` + +#### ä»Žç½‘ç»œè¿œç«¯è¯»å–æƒé‡æ–‡ä»¶ + +```{note} +ç›®å‰åªæ”¯æŒä»Žç½‘ç»œè¿œç«¯è¯»å–æƒé‡æ–‡ä»¶ï¼Œæš‚䏿”¯æŒå°†æƒé‡æ–‡ä»¶å†™å…¥ç½‘络远端 +``` + +```python +import io +import torch +from mmcv.fileio.file_client import HTTPBackend, FileClient + +filepath = 'http://path/of/your/checkpoint.pth' +checkpoint = torch.utils.model_zoo.load_url(filepath) + +http_backend = HTTPBackend() +with io.BytesIO(http_backend.get(filepath)) as buffer: + checkpoint = torch.load(buffer) + +file_client = FileClient.infer_client(uri=filepath) +with io.BytesIO(file_client.get(filepath)) as buffer: + checkpoint = torch.load(buffer) +``` diff --git a/docs_zh_CN/understand_mmcv/ops.md b/docs_zh_CN/understand_mmcv/ops.md new file mode 100644 index 0000000..a45bb14 --- /dev/null +++ b/docs_zh_CN/understand_mmcv/ops.md @@ -0,0 +1,36 @@ +## CUDA ç®—å­ + +MMCV æä¾›äº†æ£€æµ‹ã€åˆ†å‰²ç­‰ä»»åŠ¡ä¸­å¸¸ç”¨çš„ CUDA ç®—å­ + +- AssignScoreWithK +- BallQuery +- BBoxOverlaps +- CARAFE +- CrissCrossAttention +- ContextBlock +- CornerPool +- Deformable Convolution v1/v2 +- Deformable RoIPool +- DynamicScatter +- GatherPoints +- FurthestPointSample +- FurthestPointSampleWithDist +- GeneralizedAttention +- KNN +- MaskedConv +- NMS +- PSAMask +- RoIPointPool3d +- RoIPool +- RoIAlign +- RoIAwarePool3d +- SimpleRoIAlign +- SigmoidFocalLoss +- SoftmaxFocalLoss +- SoftNMS +- Synchronized BatchNorm +- Voxelization +- ThreeInterpolate +- ThreeNN +- Weight standardization +- Correlation diff --git a/docs_zh_CN/understand_mmcv/registry.md b/docs_zh_CN/understand_mmcv/registry.md new file mode 100644 index 0000000..3afd0ab --- /dev/null +++ b/docs_zh_CN/understand_mmcv/registry.md @@ -0,0 +1,149 @@ +## 注册器 +MMCV 使用 [注册器](https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/registry.py) æ¥ç®¡ç†å…·æœ‰ç›¸ä¼¼åŠŸèƒ½çš„ä¸åŒæ¨¡å—, 例如, 检测器中的主干网络ã€å¤´éƒ¨ã€å’Œæ¨¡åž‹é¢ˆéƒ¨ã€‚ +在 OpenMMLab å®¶æ—中的ç»å¤§éƒ¨åˆ†å¼€æºé¡¹ç›®ä½¿ç”¨æ³¨å†Œå™¨åŽ»ç®¡ç†æ•°æ®é›†å’Œæ¨¡åž‹çš„æ¨¡å—,例如 [MMDetection](https://github.com/open-mmlab/mmdetection), [MMDetection3D](https://github.com/open-mmlab/mmdetection3d), [MMClassification](https://github.com/open-mmlab/mmclassification), [MMEditing](https://github.com/open-mmlab/mmediting) 等。 + +### 什么是注册器 +在MMCV中,注册器å¯ä»¥çœ‹ä½œç±»åˆ°å­—符串的映射。 +一个注册器中的类通常有相似的接å£ï¼Œä½†æ˜¯å¯ä»¥å®žçްä¸åŒçš„算法或支æŒä¸åŒçš„æ•°æ®é›†ã€‚ +借助注册器,用户å¯ä»¥é€šè¿‡ä½¿ç”¨ç›¸åº”的字符串查找并实例化该类,并根æ®ä»–们的需è¦å®žä¾‹åŒ–对应模å—。 +一个典型的案例是,OpenMMLab 中的大部分开æºé¡¹ç›®çš„é…置系统,这些系统通过é…置文件æ¥ä½¿ç”¨æ³¨å†Œå™¨åˆ›å»ºé’©å­ã€æ‰§è¡Œå™¨ã€æ¨¡åž‹å’Œæ•°æ®é›†ã€‚ +å¯ä»¥åœ¨[这里](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.Registry)找到注册器接å£ä½¿ç”¨æ–‡æ¡£ã€‚ + +使用 `registry`(注册器)管ç†ä»£ç åº“中的模型,需è¦ä»¥ä¸‹ä¸‰ä¸ªæ­¥éª¤ã€‚ + +1. 创建一个构建方法(å¯é€‰ï¼Œåœ¨å¤§å¤šæ•°æƒ…况下您å¯ä»¥åªä½¿ç”¨é»˜è®¤æ–¹æ³•) +2. 创建注册器 +3. 使用此注册器æ¥ç®¡ç†æ¨¡å— + +`Registry`ï¼ˆæ³¨å†Œå™¨ï¼‰çš„å‚æ•° `build_func`(构建函数) 用æ¥è‡ªå®šä»¥å¦‚何实例化类的实例,默认使用 [这里](https://mmcv.readthedocs.io/en/latest/api.html?highlight=registry#mmcv.utils.build_from_cfg)实现的`build_from_cfg`。 + +### 一个简å•çš„ä¾‹å­ + +这里是一个使用注册器管ç†åŒ…中模å—的简å•示例。您å¯ä»¥åœ¨ OpenMMLab å¼€æºé¡¹ç›®ä¸­æ‰¾åˆ°æ›´å¤šå®žä¾‹ã€‚ + +å‡è®¾æˆ‘们è¦å®žçŽ°ä¸€ç³»åˆ—æ•°æ®é›†è½¬æ¢å™¨ï¼ˆDataset Converter),用于将ä¸åŒæ ¼å¼çš„æ•°æ®è½¬æ¢ä¸ºæ ‡å‡†æ•°æ®æ ¼å¼ã€‚我们先创建一个å为converters的目录作为包,在包中我们创建一个文件æ¥å®žçŽ°æž„å»ºå™¨ï¼ˆbuilder),命å为converters/builder.py,如下 + +```python +from mmcv.utils import Registry +# 创建转æ¢å™¨ï¼ˆconverter)的注册器(registry) +CONVERTERS = Registry('converter') +``` + +ç„¶åŽæˆ‘们在包中å¯ä»¥å®žçްä¸åŒçš„转æ¢å™¨ï¼ˆconverter)。例如,在 `converters/converter1.py` 中实现 `Converter1`。 + +```python +from .builder import CONVERTERS + +# ä½¿ç”¨æ³¨å†Œå™¨ç®¡ç†æ¨¡å— +@CONVERTERS.register_module() +class Converter1(object): + def __init__(self, a, b): + self.a = a + self.b = b +``` +ä½¿ç”¨æ³¨å†Œå™¨ç®¡ç†æ¨¡å—çš„å…³é”®æ­¥éª¤æ˜¯ï¼Œå°†å®žçŽ°çš„æ¨¡å—æ³¨å†Œåˆ°æ³¨å†Œè¡¨ `CONVERTERS` 中。通过 `@CONVERTERS.register_module()` 装饰所实现的模å—,字符串和类之间的映射就å¯ä»¥ç”± `CONVERTERS` 构建和维护,如下所示: + +é€šè¿‡è¿™ç§æ–¹å¼ï¼Œå°±å¯ä»¥é€šè¿‡ `CONVERTERS` 建立字符串与类之间的映射,如下所示: + +```python +'Converter1' -> +``` + +如果模å—被æˆåŠŸæ³¨å†Œäº†ï¼Œä½ å¯ä»¥é€šè¿‡é…置文件使用这个转æ¢å™¨ï¼ˆconverter),如下所示: + +```python +converter_cfg = dict(type='Converter1', a=a_value, b=b_value) +converter = CONVERTERS.build(converter_cfg) +``` + +### 自定义构建函数 + +å‡è®¾æˆ‘们想自定义 `converters` 的构建æµç¨‹ï¼Œæˆ‘们å¯ä»¥å®žçŽ°ä¸€ä¸ªè‡ªå®šä¹‰çš„ `build_func` (构建函数)并将其传递到注册器中。 + +```python +from mmcv.utils import Registry + +# 创建一个构建函数 +def build_converter(cfg, registry, *args, **kwargs): + cfg_ = cfg.copy() + converter_type = cfg_.pop('type') + if converter_type not in registry: + raise KeyError(f'Unrecognized converter type {converter_type}') + else: + converter_cls = registry.get(converter_type) + + converter = converter_cls(*args, **kwargs, **cfg_) + return converter + +# 创建一个用于转æ¢å™¨ï¼ˆconverters)的注册器,并传递(registry)``build_converter`` 函数 +CONVERTERS = Registry('converter', build_func=build_converter) +``` + +```{note} +注:在这个例å­ä¸­ï¼Œæˆ‘ä»¬æ¼”ç¤ºäº†å¦‚ä½•ä½¿ç”¨å‚æ•°ï¼š`build_func` 自定义构建类的实例的方法。 +该功能类似于默认的`build_from_cfg`。在大多数情况下,默认就足够了。 +``` + +`build_model_from_cfg`也实现了在`nn.Sequentail`中构建PyTorch模å—,你å¯ä»¥ç›´æŽ¥ä½¿ç”¨å®ƒä»¬ã€‚ + +### 注册器层结构 + +你也å¯ä»¥ä»Žå¤šä¸ª OpenMMLab å¼€æºæ¡†æž¶ä¸­æž„建模å—,例如,你å¯ä»¥æŠŠæ‰€æœ‰ [MMClassification](https://github.com/open-mmlab/mmclassification) 中的主干网络(backbone)用到 [MMDetection](https://github.com/open-mmlab/mmdetection) 的目标检测中,你也å¯ä»¥èžåˆ [MMDetection](https://github.com/open-mmlab/mmdetection) 中的目标检测模型 å’Œ [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) 语义分割模型。 + +下游代ç åº“中所有 `MODELS` 注册器都是MMCV `MODELS` æ³¨å†Œå™¨çš„å­æ³¨å†Œå™¨ã€‚åŸºæœ¬ä¸Šï¼Œä½¿ç”¨ä»¥ä¸‹ä¸¤ç§æ–¹æ³•ä»Žå­æ³¨å†Œå™¨æˆ–相邻兄弟注册器构建模å—。 + +1. ä»Žå­æ³¨å†Œå™¨ä¸­æž„建 + + 例如: + + 我们在 MMDetection 中定义: + + ```python + from mmcv.utils import Registry + from mmcv.cnn import MODELS as MMCV_MODELS + MODELS = Registry('model', parent=MMCV_MODELS) + + @MODELS.register_module() + class NetA(nn.Module): + def forward(self, x): + return x + ``` + + 我们在 MMClassification 中定义: + + ```python + from mmcv.utils import Registry + from mmcv.cnn import MODELS as MMCV_MODELS + MODELS = Registry('model', parent=MMCV_MODELS) + + @MODELS.register_module() + class NetB(nn.Module): + def forward(self, x): + return x + 1 + ``` + + 我们å¯ä»¥é€šè¿‡ä»¥ä¸‹ä»£ç åœ¨ MMDetection 或 MMClassification 中构建两个网络: + + ```python + from mmdet.models import MODELS + net_a = MODELS.build(cfg=dict(type='NetA')) + net_b = MODELS.build(cfg=dict(type='mmcls.NetB')) + ``` + + 或 + + ```python + from mmcls.models import MODELS + net_a = MODELS.build(cfg=dict(type='mmdet.NetA')) + net_b = MODELS.build(cfg=dict(type='NetB')) + ``` + +2. 从父注册器中构建 + + MMCV中的共享`MODELS`注册器是所有下游代ç åº“的父注册器(根注册器): + + ```python + from mmcv.cnn import MODELS as MMCV_MODELS + net_a = MMCV_MODELS.build(cfg=dict(type='mmdet.NetA')) + net_b = MMCV_MODELS.build(cfg=dict(type='mmcls.NetB')) + ``` diff --git a/docs_zh_CN/understand_mmcv/runner.md b/docs_zh_CN/understand_mmcv/runner.md new file mode 100644 index 0000000..203a5dc --- /dev/null +++ b/docs_zh_CN/understand_mmcv/runner.md @@ -0,0 +1,155 @@ +## 执行器 + +执行器模å—负责模型训练过程调度,主è¦ç›®çš„æ˜¯è®©ç”¨æˆ·ä½¿ç”¨æ›´å°‘的代ç ä»¥åŠçµæ´»å¯é…置方å¼å¼€å¯è®­ç»ƒã€‚其具备如下核心特性: + +- 支æŒä»¥ `EpochBasedRunner` å’Œ `IterBasedRunner` 为å•ä½çš„迭代模å¼ä»¥æ»¡è¶³ä¸åŒåœºæ™¯ +- 支æŒå®šåˆ¶å·¥ä½œæµä»¥æ»¡è¶³è®­ç»ƒè¿‡ç¨‹ä¸­å„状æ€è‡ªç”±åˆ‡æ¢ï¼Œç›®å‰æ”¯æŒè®­ç»ƒå’ŒéªŒè¯ä¸¤ä¸ªå·¥ä½œæµã€‚工作æµå¯ä»¥ç®€å•ç†è§£ä¸ºä¸€ä¸ªå®Œæˆçš„训练和验è¯è¿­ä»£è¿‡ç¨‹ã€‚ +- é…åˆå„类默认和自定义 Hook,对外æä¾›äº†çµæ´»æ‰©å±•能力 + +### EpochBasedRunner + +é¡¾åæ€ä¹‰ï¼Œ`EpochBasedRunner` 是指以 epoch 为周期的工作æµï¼Œä¾‹å¦‚设置 workflow = [('train', 2), ('val', 1)] 表示循环迭代地训练 2 个 epoch,然åŽéªŒè¯ 1 个 epoch。MMDetection 目标检测框架默认采用的是 `EpochBasedRunner`。 + +其抽象逻辑如下所示: + +```python +# 训练终止æ¡ä»¶ +while curr_epoch < max_epochs: + # é历用户设置的工作æµï¼Œä¾‹å¦‚ workflow = [('train', 2),('val', 1)] + for i, flow in enumerate(workflow): + # mode 是工作æµå‡½æ•°ï¼Œä¾‹å¦‚ train, epochs 是迭代次数 + mode, epochs = flow + # è¦ä¹ˆè°ƒç”¨ self.train(),è¦ä¹ˆè°ƒç”¨ self.val() + epoch_runner = getattr(self, mode) + # è¿è¡Œå¯¹åº”工作æµå‡½æ•° + for _ in range(epochs): + epoch_runner(data_loaders[i], **kwargs) +``` +ç›®å‰æ”¯æŒè®­ç»ƒå’ŒéªŒè¯ä¸¤ä¸ªå·¥ä½œæµï¼Œä»¥è®­ç»ƒå‡½æ•°ä¸ºä¾‹ï¼Œå…¶æŠ½è±¡é€»è¾‘是: + +```python +# epoch_runner ç›®å‰å¯ä»¥æ˜¯ train 或者 val +def train(self, data_loader, **kwargs): + # é历 dataset,共返回一个 epoch çš„ batch æ•°æ® + for i, data_batch in enumerate(data_loader): + self.call_hook('before_train_iter') + # éªŒè¯æ—¶å€™ train_mode=False + self.run_iter(data_batch, train_mode=True, **kwargs) + self.call_hook('after_train_iter') + self.call_hook('after_train_epoch') +``` + +### IterBasedRunner +ä¸åŒäºŽ `EpochBasedRunner`,`IterBasedRunner` 是指以 iter 为周期的工作æµï¼Œä¾‹å¦‚设置 workflow = [('train', 2), ('val', 1)] 表示循环迭代的训练 2 个 iter,然åŽéªŒè¯ 1 个 iter,MMSegmentation 语义分割框架默认采用的是 `EpochBasedRunner`。 + +其抽象逻辑如下所示: + +```python +# 虽然是 iter å•ä½ï¼Œä½†æ˜¯æŸäº›åœºåˆéœ€è¦ epoch ä¿¡æ¯ï¼Œç”± IterLoader æä¾› +iter_loaders = [IterLoader(x) for x in data_loaders] +# 训练终止æ¡ä»¶ +while curr_iter < max_iters: + # é历用户设置的工作æµï¼Œä¾‹å¦‚ workflow = [('train', 2), ('val', 1)] + for i, flow in enumerate(workflow): + # mode 是工作æµå‡½æ•°ï¼Œä¾‹å¦‚ train, iters 是迭代次数 + mode, iters = flow + # è¦ä¹ˆè°ƒç”¨ self.train(),è¦ä¹ˆè°ƒç”¨ self.val() + iter_runner = getattr(self, mode) + # è¿è¡Œå¯¹åº”工作æµå‡½æ•° + for _ in range(iters): + iter_runner(iter_loaders[i], **kwargs) +``` +ç›®å‰æ”¯æŒè®­ç»ƒå’ŒéªŒè¯ä¸¤ä¸ªå·¥ä½œæµï¼Œä»¥éªŒè¯å‡½æ•°ä¸ºä¾‹ï¼Œå…¶æŠ½è±¡é€»è¾‘是: + +```python +# iter_runner ç›®å‰å¯ä»¥æ˜¯ train 或者 val +def val(self, data_loader, **kwargs): + # èŽ·å– batch æ•°æ®ï¼Œç”¨äºŽä¸€æ¬¡è¿­ä»£ + data_batch = next(data_loader) + self.call_hook('before_val_iter') + outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) + self.outputs = outputs + self.call_hook('after_val_iter') +``` + +除了上述基础功能外,`EpochBasedRunner` å’Œ `IterBasedRunner` 还æä¾›äº† resume 〠save_checkpoint 和注册 hook 功能。 + +### 一个简å•ä¾‹å­ +以最常用的分类任务为例详细说明 `runner` 的使用方法。 å¼€å¯ä»»ä½•一个训练任务,都需è¦åŒ…括如下步骤: + +**(1) dataloaderã€model 和优化器等类åˆå§‹åŒ–** + +```python +# 模型类åˆå§‹åŒ– +model=... +# 优化器类åˆå§‹åŒ–,典型值 cfg.optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) +optimizer = build_optimizer(model, cfg.optimizer) +# 工作æµå¯¹åº”çš„ dataloader åˆå§‹åŒ– +data_loaders = [ + build_dataloader( + ds, + cfg.data.samples_per_gpu, + cfg.data.workers_per_gpu, + ...) for ds in dataset + ] +``` + +**(2) runner ç±»åˆå§‹åŒ–** + +```python +runner = build_runner( + # cfg.runner 典型é…置为 + # runner = dict(type='EpochBasedRunner', max_epochs=200) + cfg.runner, + default_args=dict( + model=model, + batch_processor=None, + optimizer=optimizer, + logger=logger)) +``` + +**(3) 注册默认训练所必须的 hook,和用户自定义 hook** + +```python +# 注册定制必需的 hook +runner.register_training_hooks( + # lr相关é…置,典型为 + # lr_config = dict(policy='step', step=[100, 150]) + cfg.lr_config, + # 优化相关é…置,例如 grad_clip ç­‰ + optimizer_config, + # æƒé‡ä¿å­˜ç›¸å…³é…置,典型为 + # checkpoint_config = dict(interval=1),æ¯ä¸ªå•ä½éƒ½ä¿å­˜æƒé‡ + cfg.checkpoint_config, + # 日志相关é…ç½® + cfg.log_config, + ...) + +# 注册用户自定义 hook +# 例如想使用 ema 功能,则å¯ä»¥è®¾ç½® custom_hooks=[dict(type='EMAHook')] +if cfg.get('custom_hooks', None): + custom_hooks = cfg.custom_hooks + for hook_cfg in cfg.custom_hooks: + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = build_from_cfg(hook_cfg, HOOKS) + runner.register_hook(hook, priority=priority) +``` + +ç„¶åŽå¯ä»¥è¿›è¡Œ resume 或者 load_checkpoint 对æƒé‡è¿›è¡ŒåŠ è½½ã€‚ + +**(4) å¼€å¯è®­ç»ƒæµ** + +```python +# workflow 典型为 workflow = [('train', 1)] +# 此时就真正开å¯äº†è®­ç»ƒ +runner.run(data_loaders, cfg.workflow) +``` + +关于 workflow 设置,以 `EpochBasedRunner` 为例,详情如下: + +- å‡è®¾åªæƒ³è¿è¡Œè®­ç»ƒå·¥ä½œæµï¼Œåˆ™å¯ä»¥è®¾ç½® workflow = [('train', 1)],表示åªè¿›è¡Œè¿­ä»£è®­ç»ƒ +- å‡è®¾æƒ³è¿è¡Œè®­ç»ƒå’ŒéªŒè¯å·¥ä½œæµï¼Œåˆ™å¯ä»¥è®¾ç½® workflow = [('train', 3), ('val', 1)],表示先训练 3 个 epoch ,然åŽåˆ‡æ¢åˆ° val 工作æµï¼Œè¿è¡Œ 1 个 epoch,然åŽå¾ªçŽ¯ï¼Œç›´åˆ°è®­ç»ƒ epoch 次数达到指定值 +- 工作æµè®¾ç½®è¿˜è‡ªç”±å®šåˆ¶ï¼Œä¾‹å¦‚ä½ å¯ä»¥å…ˆéªŒè¯å†è®­ç»ƒ workflow = [('val', 1), ('train', 1)] + +上述代ç éƒ½å·²ç»å°è£…到了å„个代ç åº“çš„ train.py 中,用户åªéœ€è¦è®¾ç½®ç›¸åº”çš„é…ç½®å³å¯ï¼Œä¸Šè¿°æµç¨‹ä¼šè‡ªåЍè¿è¡Œã€‚ diff --git a/docs_zh_CN/understand_mmcv/utils.md b/docs_zh_CN/understand_mmcv/utils.md new file mode 100644 index 0000000..746c560 --- /dev/null +++ b/docs_zh_CN/understand_mmcv/utils.md @@ -0,0 +1,69 @@ +## 辅助函数 + +### è¿›åº¦æ¡ + +如果你想跟踪函数批处ç†ä»»åŠ¡çš„è¿›åº¦ï¼Œå¯ä»¥ä½¿ç”¨ `track_progress` 。它能以进度æ¡çš„å½¢å¼å±•ç¤ºä»»åŠ¡çš„å®Œæˆæƒ…况以åŠå‰©ä½™ä»»åŠ¡æ‰€éœ€çš„æ—¶é—´ï¼ˆå†…éƒ¨å®žçŽ°ä¸ºfor循环)。 + +```python +import mmcv + +def func(item): + # 执行相关æ“作 + pass + +tasks = [item_1, item_2, ..., item_n] + +mmcv.track_progress(func, tasks) +``` + +效果如下 +![progress](../../docs/_static/progress.*) + +如果你想å¯è§†åŒ–多进程任务的进度,你å¯ä»¥ä½¿ç”¨ `track_parallel_progress` 。 + +```python +mmcv.track_parallel_progress(func, tasks, 8) # 8 workers +``` + +![progress](../../docs/_static/parallel_progress.*) + +如果你想è¦è¿­ä»£æˆ–枚举数æ®åˆ—表并å¯è§†åŒ–进度,ä½ å¯ä»¥ä½¿ç”¨ `track_iter_progress` 。 + +```python +import mmcv + +tasks = [item_1, item_2, ..., item_n] + +for task in mmcv.track_iter_progress(tasks): + # do something like print + print(task) + +for i, task in enumerate(mmcv.track_iter_progress(tasks)): + # do something like print + print(i) + print(task) +``` + +### 计时器 + +mmcvæä¾›çš„ `Timer` å¯ä»¥å¾ˆæ–¹ä¾¿åœ°è®¡ç®—代ç å—的执行时间。 + +```python +import time + +with mmcv.Timer(): + # simulate some code block + time.sleep(1) +``` + +你也å¯ä»¥ä½¿ç”¨ `since_start()` å’Œ `since_last_check()` 。å‰è€…返回计时器å¯åЍåŽçš„è¿è¡Œæ—¶é•¿ï¼ŒåŽè€…返回最近一次查看计时器åŽçš„è¿è¡Œæ—¶é•¿ã€‚ + + +```python +timer = mmcv.Timer() +# code block 1 here +print(timer.since_start()) +# code block 2 here +print(timer.since_last_check()) +print(timer.since_start()) +``` diff --git a/docs/zh_cn/understand_mmcv/visualization.md b/docs_zh_CN/understand_mmcv/visualization.md similarity index 100% rename from docs/zh_cn/understand_mmcv/visualization.md rename to docs_zh_CN/understand_mmcv/visualization.md diff --git a/examples/train.py b/examples/train.py new file mode 100644 index 0000000..2dbdfee --- /dev/null +++ b/examples/train.py @@ -0,0 +1,84 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torchvision.transforms as transforms +from torch.utils.data import DataLoader +from torchvision.datasets import CIFAR10 + +from mmcv.parallel import MMDataParallel +from mmcv.runner import EpochBasedRunner +from mmcv.utils import get_logger + + +class Model(nn.Module): + + def __init__(self): + super(Model, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + self.loss_fn = nn.CrossEntropyLoss() + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x + + def train_step(self, data, optimizer): + images, labels = data + predicts = self(images) # -> self.__call__() -> self.forward() + loss = self.loss_fn(predicts, labels) + return {'loss': loss} + + +if __name__ == '__main__': + model = Model() + if torch.cuda.is_available(): + # only use gpu:0 to train + # Solved issue https://github.com/open-mmlab/mmcv/issues/1470 + model = MMDataParallel(model.cuda(), device_ids=[0]) + + # dataset and dataloader + transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) + trainset = CIFAR10( + root='data', train=True, download=True, transform=transform) + trainloader = DataLoader( + trainset, batch_size=128, shuffle=True, num_workers=2) + + optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) + logger = get_logger('mmcv') + # runner is a scheduler to manage the training + runner = EpochBasedRunner( + model, + optimizer=optimizer, + work_dir='./work_dir', + logger=logger, + max_epochs=4) + + # learning rate scheduler config + lr_config = dict(policy='step', step=[2, 3]) + # configuration of optimizer + optimizer_config = dict(grad_clip=None) + # configuration of saving checkpoints periodically + checkpoint_config = dict(interval=1) + # save log periodically and multiple hooks can be used simultaneously + log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) + # register hooks to runner and those hooks will be invoked automatically + runner.register_training_hooks( + lr_config=lr_config, + optimizer_config=optimizer_config, + checkpoint_config=checkpoint_config, + log_config=log_config) + + runner.run([trainloader], [('train', 1)]) diff --git a/mmcv/__init__.py b/mmcv/__init__.py index 2410ea5..210a298 100644 --- a/mmcv/__init__.py +++ b/mmcv/__init__.py @@ -1,13 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. # flake8: noqa from .arraymisc import * +from .fileio import * from .image import * -from .transforms import * +from .utils import * from .version import * from .video import * from .visualization import * # The following modules are not imported to this level, so mmcv may be used # without PyTorch. +# - runner +# - parallel # - op -# - utils diff --git a/mmcv/arraymisc/quantization.py b/mmcv/arraymisc/quantization.py index 6182710..8e47a35 100644 --- a/mmcv/arraymisc/quantization.py +++ b/mmcv/arraymisc/quantization.py @@ -1,20 +1,14 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Union - import numpy as np -def quantize(arr: np.ndarray, - min_val: Union[int, float], - max_val: Union[int, float], - levels: int, - dtype=np.int64) -> tuple: +def quantize(arr, min_val, max_val, levels, dtype=np.int64): """Quantize an array of (-inf, inf) to [0, levels-1]. Args: arr (ndarray): Input array. - min_val (int or float): Minimum value to be clipped. - max_val (int or float): Maximum value to be clipped. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. levels (int): Quantization levels. dtype (np.type): The type of the quantized array. @@ -35,17 +29,13 @@ def quantize(arr: np.ndarray, return quantized_arr -def dequantize(arr: np.ndarray, - min_val: Union[int, float], - max_val: Union[int, float], - levels: int, - dtype=np.float64) -> tuple: +def dequantize(arr, min_val, max_val, levels, dtype=np.float64): """Dequantize an array. Args: arr (ndarray): Input array. - min_val (int or float): Minimum value to be clipped. - max_val (int or float): Maximum value to be clipped. + min_val (scalar): Minimum value to be clipped. + max_val (scalar): Maximum value to be clipped. levels (int): Quantization levels. dtype (np.type): The type of the dequantized array. diff --git a/mmcv/cnn/__init__.py b/mmcv/cnn/__init__.py index 10e7e02..7246c89 100644 --- a/mmcv/cnn/__init__.py +++ b/mmcv/cnn/__init__.py @@ -1,7 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. from .alexnet import AlexNet # yapf: disable -from .bricks import (ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule, +from .bricks import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS, + ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule, ConvTranspose2d, ConvTranspose3d, ConvWS2d, DepthwiseSeparableConvModule, GeneralizedAttention, HSigmoid, HSwish, Linear, MaxPool2d, MaxPool3d, @@ -9,20 +11,31 @@ from .bricks import (ContextBlock, Conv2d, Conv3d, ConvAWS2d, ConvModule, build_activation_layer, build_conv_layer, build_norm_layer, build_padding_layer, build_plugin_layer, build_upsample_layer, conv_ws_2d, is_norm) +from .builder import MODELS, build_model_from_cfg # yapf: enable from .resnet import ResNet, make_res_layer -from .rfsearch import Conv2dRFSearchOp, RFSearchHook -from .utils import fuse_conv_bn, get_model_complexity_info +from .utils import (INITIALIZERS, Caffe2XavierInit, ConstantInit, KaimingInit, + NormalInit, PretrainedInit, TruncNormalInit, UniformInit, + XavierInit, bias_init_with_prob, caffe2_xavier_init, + constant_init, fuse_conv_bn, get_model_complexity_info, + initialize, kaiming_init, normal_init, trunc_normal_init, + uniform_init, xavier_init) from .vgg import VGG, make_vgg_layer __all__ = [ 'AlexNet', 'VGG', 'make_vgg_layer', 'ResNet', 'make_res_layer', - 'ConvModule', 'build_activation_layer', 'build_conv_layer', - 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', - 'build_plugin_layer', 'is_norm', 'NonLocal1d', 'NonLocal2d', 'NonLocal3d', - 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish', 'GeneralizedAttention', - 'Scale', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d', - 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d', 'ConvTranspose2d', - 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'fuse_conv_bn', - 'get_model_complexity_info', 'Conv2dRFSearchOp', 'RFSearchHook' + 'constant_init', 'xavier_init', 'normal_init', 'trunc_normal_init', + 'uniform_init', 'kaiming_init', 'caffe2_xavier_init', + 'bias_init_with_prob', 'ConvModule', 'build_activation_layer', + 'build_conv_layer', 'build_norm_layer', 'build_padding_layer', + 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'NonLocal1d', + 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'HSigmoid', 'Swish', 'HSwish', + 'GeneralizedAttention', 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', + 'PADDING_LAYERS', 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', + 'get_model_complexity_info', 'conv_ws_2d', 'ConvAWS2d', 'ConvWS2d', + 'fuse_conv_bn', 'DepthwiseSeparableConvModule', 'Linear', 'Conv2d', + 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', + 'initialize', 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', + 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', + 'Caffe2XavierInit', 'MODELS', 'build_model_from_cfg' ] diff --git a/mmcv/cnn/alexnet.py b/mmcv/cnn/alexnet.py index 309be24..89e36b8 100644 --- a/mmcv/cnn/alexnet.py +++ b/mmcv/cnn/alexnet.py @@ -1,10 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import logging -from typing import Optional -import torch import torch.nn as nn -from mmengine.runner import load_checkpoint class AlexNet(nn.Module): @@ -14,8 +11,8 @@ class AlexNet(nn.Module): num_classes (int): number of classes for classification. """ - def __init__(self, num_classes: int = -1): - super().__init__() + def __init__(self, num_classes=-1): + super(AlexNet, self).__init__() self.num_classes = num_classes self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), @@ -43,9 +40,10 @@ class AlexNet(nn.Module): nn.Linear(4096, num_classes), ) - def init_weights(self, pretrained: Optional[str] = None) -> None: + def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() + from ..runner import load_checkpoint load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: # use default initializer @@ -53,7 +51,7 @@ class AlexNet(nn.Module): else: raise TypeError('pretrained must be a str or None') - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): x = self.features(x) if self.num_classes > 0: diff --git a/mmcv/cnn/bricks/__init__.py b/mmcv/cnn/bricks/__init__.py index 6c74986..0f33124 100644 --- a/mmcv/cnn/bricks/__init__.py +++ b/mmcv/cnn/bricks/__init__.py @@ -14,7 +14,9 @@ from .non_local import NonLocal1d, NonLocal2d, NonLocal3d from .norm import build_norm_layer, is_norm from .padding import build_padding_layer from .plugin import build_plugin_layer -from .scale import LayerScale, Scale +from .registry import (ACTIVATION_LAYERS, CONV_LAYERS, NORM_LAYERS, + PADDING_LAYERS, PLUGIN_LAYERS, UPSAMPLE_LAYERS) +from .scale import Scale from .swish import Swish from .upsample import build_upsample_layer from .wrappers import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d, @@ -25,8 +27,9 @@ __all__ = [ 'build_norm_layer', 'build_padding_layer', 'build_upsample_layer', 'build_plugin_layer', 'is_norm', 'HSigmoid', 'HSwish', 'NonLocal1d', 'NonLocal2d', 'NonLocal3d', 'ContextBlock', 'GeneralizedAttention', - 'Scale', 'ConvAWS2d', 'ConvWS2d', 'conv_ws_2d', - 'DepthwiseSeparableConvModule', 'Swish', 'Linear', 'Conv2dAdaptivePadding', - 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', 'ConvTranspose3d', 'MaxPool3d', - 'Conv3d', 'Dropout', 'DropPath', 'LayerScale' + 'ACTIVATION_LAYERS', 'CONV_LAYERS', 'NORM_LAYERS', 'PADDING_LAYERS', + 'UPSAMPLE_LAYERS', 'PLUGIN_LAYERS', 'Scale', 'ConvAWS2d', 'ConvWS2d', + 'conv_ws_2d', 'DepthwiseSeparableConvModule', 'Swish', 'Linear', + 'Conv2dAdaptivePadding', 'Conv2d', 'ConvTranspose2d', 'MaxPool2d', + 'ConvTranspose3d', 'MaxPool3d', 'Conv3d', 'Dropout', 'DropPath' ] diff --git a/mmcv/cnn/bricks/activation.py b/mmcv/cnn/bricks/activation.py index ae99714..79f1988 100644 --- a/mmcv/cnn/bricks/activation.py +++ b/mmcv/cnn/bricks/activation.py @@ -1,41 +1,20 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict - import torch import torch.nn as nn import torch.nn.functional as F -from mmengine.registry import MODELS -from mmengine.utils import digit_version -from mmengine.utils.dl_utils import TORCH_VERSION + +from mmcv.utils import TORCH_VERSION, build_from_cfg, digit_version +from .registry import ACTIVATION_LAYERS for module in [ nn.ReLU, nn.LeakyReLU, nn.PReLU, nn.RReLU, nn.ReLU6, nn.ELU, nn.Sigmoid, nn.Tanh ]: - MODELS.register_module(module=module) - -if digit_version(torch.__version__) >= digit_version('1.7.0'): - MODELS.register_module(module=nn.SiLU, name='SiLU') -else: - - class SiLU(nn.Module): - """Sigmoid Weighted Liner Unit.""" + ACTIVATION_LAYERS.register_module(module=module) - def __init__(self, inplace=False): - super().__init__() - self.inplace = inplace - def forward(self, inputs) -> torch.Tensor: - if self.inplace: - return inputs.mul_(torch.sigmoid(inputs)) - else: - return inputs * torch.sigmoid(inputs) - - MODELS.register_module(module=SiLU, name='SiLU') - - -@MODELS.register_module(name='Clip') -@MODELS.register_module() +@ACTIVATION_LAYERS.register_module(name='Clip') +@ACTIVATION_LAYERS.register_module() class Clamp(nn.Module): """Clamp activation layer. @@ -49,12 +28,12 @@ class Clamp(nn.Module): Default to 1. """ - def __init__(self, min: float = -1., max: float = 1.): - super().__init__() + def __init__(self, min=-1., max=1.): + super(Clamp, self).__init__() self.min = min self.max = max - def forward(self, x) -> torch.Tensor: + def forward(self, x): """Forward function. Args: @@ -88,27 +67,26 @@ class GELU(nn.Module): >>> output = m(input) """ - def forward(self, input: torch.Tensor) -> torch.Tensor: + def forward(self, input): return F.gelu(input) if (TORCH_VERSION == 'parrots' or digit_version(TORCH_VERSION) < digit_version('1.4')): - MODELS.register_module(module=GELU) + ACTIVATION_LAYERS.register_module(module=GELU) else: - MODELS.register_module(module=nn.GELU) + ACTIVATION_LAYERS.register_module(module=nn.GELU) -def build_activation_layer(cfg: Dict) -> nn.Module: +def build_activation_layer(cfg): """Build activation layer. Args: cfg (dict): The activation layer config, which should contain: - - type (str): Layer type. - layer args: Args needed to instantiate an activation layer. Returns: nn.Module: Created activation layer. """ - return MODELS.build(cfg) + return build_from_cfg(cfg, ACTIVATION_LAYERS) diff --git a/mmcv/cnn/bricks/context_block.py b/mmcv/cnn/bricks/context_block.py index 1e78df8..d60fdb9 100644 --- a/mmcv/cnn/bricks/context_block.py +++ b/mmcv/cnn/bricks/context_block.py @@ -1,20 +1,19 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Union - import torch -from mmengine.model import constant_init, kaiming_init -from mmengine.registry import MODELS from torch import nn +from ..utils import constant_init, kaiming_init +from .registry import PLUGIN_LAYERS + -def last_zero_init(m: Union[nn.Module, nn.Sequential]) -> None: +def last_zero_init(m): if isinstance(m, nn.Sequential): constant_init(m[-1], val=0) else: constant_init(m, val=0) -@MODELS.register_module() +@PLUGIN_LAYERS.register_module() class ContextBlock(nn.Module): """ContextBlock module in GCNet. @@ -35,11 +34,11 @@ class ContextBlock(nn.Module): _abbr_ = 'context_block' def __init__(self, - in_channels: int, - ratio: float, - pooling_type: str = 'att', - fusion_types: tuple = ('channel_add', )): - super().__init__() + in_channels, + ratio, + pooling_type='att', + fusion_types=('channel_add', )): + super(ContextBlock, self).__init__() assert pooling_type in ['avg', 'att'] assert isinstance(fusion_types, (list, tuple)) valid_fusion_types = ['channel_add', 'channel_mul'] @@ -83,7 +82,7 @@ class ContextBlock(nn.Module): if self.channel_mul_conv is not None: last_zero_init(self.channel_mul_conv) - def spatial_pool(self, x: torch.Tensor) -> torch.Tensor: + def spatial_pool(self, x): batch, channel, height, width = x.size() if self.pooling_type == 'att': input_x = x @@ -109,7 +108,7 @@ class ContextBlock(nn.Module): return context - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): # [N, C, 1, 1] context = self.spatial_pool(x) diff --git a/mmcv/cnn/bricks/conv.py b/mmcv/cnn/bricks/conv.py index ace744e..cf54491 100644 --- a/mmcv/cnn/bricks/conv.py +++ b/mmcv/cnn/bricks/conv.py @@ -1,16 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, Optional - -from mmengine.registry import MODELS from torch import nn -MODELS.register_module('Conv1d', module=nn.Conv1d) -MODELS.register_module('Conv2d', module=nn.Conv2d) -MODELS.register_module('Conv3d', module=nn.Conv3d) -MODELS.register_module('Conv', module=nn.Conv2d) +from .registry import CONV_LAYERS + +CONV_LAYERS.register_module('Conv1d', module=nn.Conv1d) +CONV_LAYERS.register_module('Conv2d', module=nn.Conv2d) +CONV_LAYERS.register_module('Conv3d', module=nn.Conv3d) +CONV_LAYERS.register_module('Conv', module=nn.Conv2d) -def build_conv_layer(cfg: Optional[Dict], *args, **kwargs) -> nn.Module: +def build_conv_layer(cfg, *args, **kwargs): """Build convolution layer. Args: @@ -35,15 +34,11 @@ def build_conv_layer(cfg: Optional[Dict], *args, **kwargs) -> nn.Module: cfg_ = cfg.copy() layer_type = cfg_.pop('type') + if layer_type not in CONV_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') + else: + conv_layer = CONV_LAYERS.get(layer_type) - # Switch registry to the target scope. If `conv_layer` cannot be found - # in the registry, fallback to search `conv_layer` in the - # mmengine.MODELS. - with MODELS.switch_scope_and_registry(None) as registry: - conv_layer = registry.get(layer_type) - if conv_layer is None: - raise KeyError(f'Cannot find {conv_layer} in registry under scope ' - f'name {registry.scope}') layer = conv_layer(*args, **kwargs, **cfg_) return layer diff --git a/mmcv/cnn/bricks/conv2d_adaptive_padding.py b/mmcv/cnn/bricks/conv2d_adaptive_padding.py index 0ac9949..b45e758 100644 --- a/mmcv/cnn/bricks/conv2d_adaptive_padding.py +++ b/mmcv/cnn/bricks/conv2d_adaptive_padding.py @@ -1,14 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. import math -from typing import Tuple, Union -import torch -from mmengine.registry import MODELS from torch import nn from torch.nn import functional as F +from .registry import CONV_LAYERS -@MODELS.register_module() + +@CONV_LAYERS.register_module() class Conv2dAdaptivePadding(nn.Conv2d): """Implementation of 2D convolution in tensorflow with `padding` as "same", which applies padding to input (if needed) so that input image gets fully @@ -32,18 +31,18 @@ class Conv2dAdaptivePadding(nn.Conv2d): """ def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, int]], - stride: Union[int, Tuple[int, int]] = 1, - padding: Union[int, Tuple[int, int]] = 0, - dilation: Union[int, Tuple[int, int]] = 1, - groups: int = 1, - bias: bool = True): + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): img_h, img_w = x.size()[-2:] kernel_h, kernel_w = self.weight.size()[-2:] stride_h, stride_w = self.stride diff --git a/mmcv/cnn/bricks/conv_module.py b/mmcv/cnn/bricks/conv_module.py index 1f8e160..4f19f1d 100644 --- a/mmcv/cnn/bricks/conv_module.py +++ b/mmcv/cnn/bricks/conv_module.py @@ -1,20 +1,18 @@ # Copyright (c) OpenMMLab. All rights reserved. import warnings -from typing import Dict, Optional, Tuple, Union -import torch import torch.nn as nn -from mmengine.model import constant_init, kaiming_init -from mmengine.registry import MODELS -from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm, _InstanceNorm +from mmcv.utils import _BatchNorm, _InstanceNorm +from ..utils import constant_init, kaiming_init from .activation import build_activation_layer from .conv import build_conv_layer from .norm import build_norm_layer from .padding import build_padding_layer +from .registry import PLUGIN_LAYERS -@MODELS.register_module() +@PLUGIN_LAYERS.register_module() class ConvModule(nn.Module): """A conv block that bundles conv/norm/activation layers. @@ -70,22 +68,22 @@ class ConvModule(nn.Module): _abbr_ = 'conv_block' def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, int]], - stride: Union[int, Tuple[int, int]] = 1, - padding: Union[int, Tuple[int, int]] = 0, - dilation: Union[int, Tuple[int, int]] = 1, - groups: int = 1, - bias: Union[bool, str] = 'auto', - conv_cfg: Optional[Dict] = None, - norm_cfg: Optional[Dict] = None, - act_cfg: Optional[Dict] = dict(type='ReLU'), - inplace: bool = True, - with_spectral_norm: bool = False, - padding_mode: str = 'zeros', - order: tuple = ('conv', 'norm', 'act')): - super().__init__() + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias='auto', + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + inplace=True, + with_spectral_norm=False, + padding_mode='zeros', + order=('conv', 'norm', 'act')): + super(ConvModule, self).__init__() assert conv_cfg is None or isinstance(conv_cfg, dict) assert norm_cfg is None or isinstance(norm_cfg, dict) assert act_cfg is None or isinstance(act_cfg, dict) @@ -98,7 +96,7 @@ class ConvModule(nn.Module): self.with_explicit_padding = padding_mode not in official_padding_mode self.order = order assert isinstance(self.order, tuple) and len(self.order) == 3 - assert set(order) == {'conv', 'norm', 'act'} + assert set(order) == set(['conv', 'norm', 'act']) self.with_norm = norm_cfg is not None self.with_activation = act_cfg is not None @@ -145,22 +143,21 @@ class ConvModule(nn.Module): norm_channels = out_channels else: norm_channels = in_channels - self.norm_name, norm = build_norm_layer( - norm_cfg, norm_channels) # type: ignore + self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) self.add_module(self.norm_name, norm) if self.with_bias: if isinstance(norm, (_BatchNorm, _InstanceNorm)): warnings.warn( 'Unnecessary conv bias before batch/instance norm') else: - self.norm_name = None # type: ignore + self.norm_name = None # build activation layer if self.with_activation: - act_cfg_ = act_cfg.copy() # type: ignore + act_cfg_ = act_cfg.copy() # nn.Tanh has no 'inplace' argument if act_cfg_['type'] not in [ - 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish', 'GELU' + 'Tanh', 'PReLU', 'Sigmoid', 'HSigmoid', 'Swish' ]: act_cfg_.setdefault('inplace', inplace) self.activate = build_activation_layer(act_cfg_) @@ -196,10 +193,7 @@ class ConvModule(nn.Module): if self.with_norm: constant_init(self.norm, 1, bias=0) - def forward(self, - x: torch.Tensor, - activate: bool = True, - norm: bool = True) -> torch.Tensor: + def forward(self, x, activate=True, norm=True): for layer in self.order: if layer == 'conv': if self.with_explicit_padding: diff --git a/mmcv/cnn/bricks/conv_ws.py b/mmcv/cnn/bricks/conv_ws.py index 261f5c1..a3941e2 100644 --- a/mmcv/cnn/bricks/conv_ws.py +++ b/mmcv/cnn/bricks/conv_ws.py @@ -1,21 +1,19 @@ # Copyright (c) OpenMMLab. All rights reserved. -from collections import OrderedDict -from typing import Dict, List, Optional, Tuple, Union - import torch import torch.nn as nn import torch.nn.functional as F -from mmengine.registry import MODELS + +from .registry import CONV_LAYERS -def conv_ws_2d(input: torch.Tensor, - weight: torch.Tensor, - bias: Optional[torch.Tensor] = None, - stride: Union[int, Tuple[int, int]] = 1, - padding: Union[int, Tuple[int, int]] = 0, - dilation: Union[int, Tuple[int, int]] = 1, - groups: int = 1, - eps: float = 1e-5) -> torch.Tensor: +def conv_ws_2d(input, + weight, + bias=None, + stride=1, + padding=0, + dilation=1, + groups=1, + eps=1e-5): c_in = weight.size(0) weight_flat = weight.view(c_in, -1) mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) @@ -24,20 +22,20 @@ def conv_ws_2d(input: torch.Tensor, return F.conv2d(input, weight, bias, stride, padding, dilation, groups) -@MODELS.register_module('ConvWS') +@CONV_LAYERS.register_module('ConvWS') class ConvWS2d(nn.Conv2d): def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, int]], - stride: Union[int, Tuple[int, int]] = 1, - padding: Union[int, Tuple[int, int]] = 0, - dilation: Union[int, Tuple[int, int]] = 1, - groups: int = 1, - bias: bool = True, - eps: float = 1e-5): - super().__init__( + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True, + eps=1e-5): + super(ConvWS2d, self).__init__( in_channels, out_channels, kernel_size, @@ -48,12 +46,12 @@ class ConvWS2d(nn.Conv2d): bias=bias) self.eps = eps - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups, self.eps) -@MODELS.register_module(name='ConvAWS') +@CONV_LAYERS.register_module(name='ConvAWS') class ConvAWS2d(nn.Conv2d): """AWS (Adaptive Weight Standardization) @@ -78,14 +76,14 @@ class ConvAWS2d(nn.Conv2d): """ def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, int]], - stride: Union[int, Tuple[int, int]] = 1, - padding: Union[int, Tuple[int, int]] = 0, - dilation: Union[int, Tuple[int, int]] = 1, - groups: int = 1, - bias: bool = True): + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + bias=True): super().__init__( in_channels, out_channels, @@ -100,7 +98,7 @@ class ConvAWS2d(nn.Conv2d): self.register_buffer('weight_beta', torch.zeros(self.out_channels, 1, 1, 1)) - def _get_weight(self, weight: torch.Tensor) -> torch.Tensor: + def _get_weight(self, weight): weight_flat = weight.view(weight.size(0), -1) mean = weight_flat.mean(dim=1).view(-1, 1, 1, 1) std = torch.sqrt(weight_flat.var(dim=1) + 1e-5).view(-1, 1, 1, 1) @@ -108,16 +106,13 @@ class ConvAWS2d(nn.Conv2d): weight = self.weight_gamma * weight + self.weight_beta return weight - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): weight = self._get_weight(self.weight) return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups) - def _load_from_state_dict(self, state_dict: OrderedDict, prefix: str, - local_metadata: Dict, strict: bool, - missing_keys: List[str], - unexpected_keys: List[str], - error_msgs: List[str]) -> None: + def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, + missing_keys, unexpected_keys, error_msgs): """Override default load function. AWS overrides the function _load_from_state_dict to recover @@ -129,7 +124,7 @@ class ConvAWS2d(nn.Conv2d): """ self.weight_gamma.data.fill_(-1) - local_missing_keys: List = [] + local_missing_keys = [] super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, local_missing_keys, unexpected_keys, error_msgs) diff --git a/mmcv/cnn/bricks/depthwise_separable_conv_module.py b/mmcv/cnn/bricks/depthwise_separable_conv_module.py index cf1fe4c..722d5d8 100644 --- a/mmcv/cnn/bricks/depthwise_separable_conv_module.py +++ b/mmcv/cnn/bricks/depthwise_separable_conv_module.py @@ -1,7 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict, Optional, Tuple, Union - -import torch import torch.nn as nn from .conv_module import ConvModule @@ -49,27 +46,27 @@ class DepthwiseSeparableConvModule(nn.Module): """ def __init__(self, - in_channels: int, - out_channels: int, - kernel_size: Union[int, Tuple[int, int]], - stride: Union[int, Tuple[int, int]] = 1, - padding: Union[int, Tuple[int, int]] = 0, - dilation: Union[int, Tuple[int, int]] = 1, - norm_cfg: Optional[Dict] = None, - act_cfg: Dict = dict(type='ReLU'), - dw_norm_cfg: Union[Dict, str] = 'default', - dw_act_cfg: Union[Dict, str] = 'default', - pw_norm_cfg: Union[Dict, str] = 'default', - pw_act_cfg: Union[Dict, str] = 'default', + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + dw_norm_cfg='default', + dw_act_cfg='default', + pw_norm_cfg='default', + pw_act_cfg='default', **kwargs): - super().__init__() + super(DepthwiseSeparableConvModule, self).__init__() assert 'groups' not in kwargs, 'groups should not be specified' # if norm/activation config of depthwise/pointwise ConvModule is not # specified, use default config. - dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg # type: ignore # noqa E501 + dw_norm_cfg = dw_norm_cfg if dw_norm_cfg != 'default' else norm_cfg dw_act_cfg = dw_act_cfg if dw_act_cfg != 'default' else act_cfg - pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg # type: ignore # noqa E501 + pw_norm_cfg = pw_norm_cfg if pw_norm_cfg != 'default' else norm_cfg pw_act_cfg = pw_act_cfg if pw_act_cfg != 'default' else act_cfg # depthwise convolution @@ -81,19 +78,19 @@ class DepthwiseSeparableConvModule(nn.Module): padding=padding, dilation=dilation, groups=in_channels, - norm_cfg=dw_norm_cfg, # type: ignore - act_cfg=dw_act_cfg, # type: ignore + norm_cfg=dw_norm_cfg, + act_cfg=dw_act_cfg, **kwargs) self.pointwise_conv = ConvModule( in_channels, out_channels, 1, - norm_cfg=pw_norm_cfg, # type: ignore - act_cfg=pw_act_cfg, # type: ignore + norm_cfg=pw_norm_cfg, + act_cfg=pw_act_cfg, **kwargs) - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): x = self.depthwise_conv(x) x = self.pointwise_conv(x) return x diff --git a/mmcv/cnn/bricks/drop.py b/mmcv/cnn/bricks/drop.py index fe82a25..b0a0266 100644 --- a/mmcv/cnn/bricks/drop.py +++ b/mmcv/cnn/bricks/drop.py @@ -1,14 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Any, Dict, Optional - import torch import torch.nn as nn -from mmengine.registry import MODELS + +from mmcv import build_from_cfg +from .registry import DROPOUT_LAYERS -def drop_path(x: torch.Tensor, - drop_prob: float = 0., - training: bool = False) -> torch.Tensor: +def drop_path(x, drop_prob=0., training=False): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). @@ -26,7 +24,7 @@ def drop_path(x: torch.Tensor, return output -@MODELS.register_module() +@DROPOUT_LAYERS.register_module() class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). @@ -38,15 +36,15 @@ class DropPath(nn.Module): drop_prob (float): Probability of the path to be zeroed. Default: 0.1 """ - def __init__(self, drop_prob: float = 0.1): - super().__init__() + def __init__(self, drop_prob=0.1): + super(DropPath, self).__init__() self.drop_prob = drop_prob - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): return drop_path(x, self.drop_prob, self.training) -@MODELS.register_module() +@DROPOUT_LAYERS.register_module() class Dropout(nn.Dropout): """A wrapper for ``torch.nn.Dropout``, We rename the ``p`` of ``torch.nn.Dropout`` to ``drop_prob`` so as to be consistent with @@ -58,10 +56,10 @@ class Dropout(nn.Dropout): inplace (bool): Do the operation inplace or not. Default: False. """ - def __init__(self, drop_prob: float = 0.5, inplace: bool = False): + def __init__(self, drop_prob=0.5, inplace=False): super().__init__(p=drop_prob, inplace=inplace) -def build_dropout(cfg: Dict, default_args: Optional[Dict] = None) -> Any: +def build_dropout(cfg, default_args=None): """Builder for drop out layers.""" - return MODELS.build(cfg, default_args=default_args) + return build_from_cfg(cfg, DROPOUT_LAYERS, default_args) diff --git a/mmcv/cnn/bricks/generalized_attention.py b/mmcv/cnn/bricks/generalized_attention.py index ab20467..988d9ad 100644 --- a/mmcv/cnn/bricks/generalized_attention.py +++ b/mmcv/cnn/bricks/generalized_attention.py @@ -5,16 +5,17 @@ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F -from mmengine.model import kaiming_init -from mmengine.registry import MODELS +from ..utils import kaiming_init +from .registry import PLUGIN_LAYERS -@MODELS.register_module() + +@PLUGIN_LAYERS.register_module() class GeneralizedAttention(nn.Module): """GeneralizedAttention module. See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks' - (https://arxiv.org/abs/1904.05873) for details. + (https://arxiv.org/abs/1711.07971) for details. Args: in_channels (int): Channels of the input feature map. @@ -44,16 +45,16 @@ class GeneralizedAttention(nn.Module): _abbr_ = 'gen_attention_block' def __init__(self, - in_channels: int, - spatial_range: int = -1, - num_heads: int = 9, - position_embedding_dim: int = -1, - position_magnitude: int = 1, - kv_stride: int = 2, - q_stride: int = 1, - attention_type: str = '1111'): + in_channels, + spatial_range=-1, + num_heads=9, + position_embedding_dim=-1, + position_magnitude=1, + kv_stride=2, + q_stride=1, + attention_type='1111'): - super().__init__() + super(GeneralizedAttention, self).__init__() # hard range means local range for non-local operation self.position_embedding_dim = ( @@ -130,7 +131,7 @@ class GeneralizedAttention(nn.Module): max_len_kv = int((max_len - 1.0) / self.kv_stride + 1) local_constraint_map = np.ones( - (max_len, max_len, max_len_kv, max_len_kv), dtype=int) + (max_len, max_len, max_len_kv, max_len_kv), dtype=np.int) for iy in range(max_len): for ix in range(max_len): local_constraint_map[ @@ -212,7 +213,7 @@ class GeneralizedAttention(nn.Module): return embedding_x, embedding_y - def forward(self, x_input: torch.Tensor) -> torch.Tensor: + def forward(self, x_input): num_heads = self.num_heads # use empirical_attention @@ -350,7 +351,7 @@ class GeneralizedAttention(nn.Module): repeat(n, 1, 1, 1) position_feat_x_reshape = position_feat_x.\ - view(n, num_heads, w * w_kv, self.qk_embed_dim) + view(n, num_heads, w*w_kv, self.qk_embed_dim) position_feat_y_reshape = position_feat_y.\ view(n, num_heads, h * h_kv, self.qk_embed_dim) diff --git a/mmcv/cnn/bricks/hsigmoid.py b/mmcv/cnn/bricks/hsigmoid.py index 423e0aa..30b1a3d 100644 --- a/mmcv/cnn/bricks/hsigmoid.py +++ b/mmcv/cnn/bricks/hsigmoid.py @@ -1,24 +1,18 @@ # Copyright (c) OpenMMLab. All rights reserved. -import warnings - -import torch import torch.nn as nn -from mmengine.registry import MODELS + +from .registry import ACTIVATION_LAYERS -@MODELS.register_module() +@ACTIVATION_LAYERS.register_module() class HSigmoid(nn.Module): """Hard Sigmoid Module. Apply the hard sigmoid function: Hsigmoid(x) = min(max((x + bias) / divisor, min_value), max_value) - Default: Hsigmoid(x) = min(max((x + 3) / 6, 0), 1) - - Note: - In MMCV v1.4.4, we modified the default value of args to align with - PyTorch official. + Default: Hsigmoid(x) = min(max((x + 1) / 2, 0), 1) Args: - bias (float): Bias of the input feature map. Default: 3.0. - divisor (float): Divisor of the input feature map. Default: 6.0. + bias (float): Bias of the input feature map. Default: 1.0. + divisor (float): Divisor of the input feature map. Default: 2.0. min_value (float): Lower bound value. Default: 0.0. max_value (float): Upper bound value. Default: 1.0. @@ -26,25 +20,15 @@ class HSigmoid(nn.Module): Tensor: The output tensor. """ - def __init__(self, - bias: float = 3.0, - divisor: float = 6.0, - min_value: float = 0.0, - max_value: float = 1.0): - super().__init__() - warnings.warn( - 'In MMCV v1.4.4, we modified the default value of args to align ' - 'with PyTorch official. Previous Implementation: ' - 'Hsigmoid(x) = min(max((x + 1) / 2, 0), 1). ' - 'Current Implementation: ' - 'Hsigmoid(x) = min(max((x + 3) / 6, 0), 1).') + def __init__(self, bias=1.0, divisor=2.0, min_value=0.0, max_value=1.0): + super(HSigmoid, self).__init__() self.bias = bias self.divisor = divisor assert self.divisor != 0 self.min_value = min_value self.max_value = max_value - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): x = (x + self.bias) / self.divisor return x.clamp_(self.min_value, self.max_value) diff --git a/mmcv/cnn/bricks/hswish.py b/mmcv/cnn/bricks/hswish.py index 6b6dd00..7e0c090 100644 --- a/mmcv/cnn/bricks/hswish.py +++ b/mmcv/cnn/bricks/hswish.py @@ -1,11 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. -import torch import torch.nn as nn -from mmengine.registry import MODELS -from mmengine.utils import digit_version -from mmengine.utils.dl_utils import TORCH_VERSION +from .registry import ACTIVATION_LAYERS + +@ACTIVATION_LAYERS.register_module() class HSwish(nn.Module): """Hard Swish Module. @@ -22,18 +21,9 @@ class HSwish(nn.Module): Tensor: The output tensor. """ - def __init__(self, inplace: bool = False): - super().__init__() + def __init__(self, inplace=False): + super(HSwish, self).__init__() self.act = nn.ReLU6(inplace) - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): return x * self.act(x + 3) / 6 - - -if (TORCH_VERSION == 'parrots' - or digit_version(TORCH_VERSION) < digit_version('1.7')): - # Hardswish is not supported when PyTorch version < 1.6. - # And Hardswish in PyTorch 1.6 does not support inplace. - MODELS.register_module(module=HSwish) -else: - MODELS.register_module(module=nn.Hardswish, name='HSwish') diff --git a/mmcv/cnn/bricks/non_local.py b/mmcv/cnn/bricks/non_local.py index 8dd4465..92d0015 100644 --- a/mmcv/cnn/bricks/non_local.py +++ b/mmcv/cnn/bricks/non_local.py @@ -1,13 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. from abc import ABCMeta -from typing import Dict, Optional import torch import torch.nn as nn -from mmengine.model import constant_init, normal_init -from mmengine.registry import MODELS +from ..utils import constant_init, normal_init from .conv_module import ConvModule +from .registry import PLUGIN_LAYERS class _NonLocalNd(nn.Module, metaclass=ABCMeta): @@ -34,14 +33,14 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): """ def __init__(self, - in_channels: int, - reduction: int = 2, - use_scale: bool = True, - conv_cfg: Optional[Dict] = None, - norm_cfg: Optional[Dict] = None, - mode: str = 'embedded_gaussian', + in_channels, + reduction=2, + use_scale=True, + conv_cfg=None, + norm_cfg=None, + mode='embedded_gaussian', **kwargs): - super().__init__() + super(_NonLocalNd, self).__init__() self.in_channels = in_channels self.reduction = reduction self.use_scale = use_scale @@ -62,7 +61,7 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): self.inter_channels, kernel_size=1, conv_cfg=conv_cfg, - act_cfg=None) # type: ignore + act_cfg=None) self.conv_out = ConvModule( self.inter_channels, self.in_channels, @@ -97,7 +96,7 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): self.init_weights(**kwargs) - def init_weights(self, std: float = 0.01, zeros_init: bool = True) -> None: + def init_weights(self, std=0.01, zeros_init=True): if self.mode != 'gaussian': for m in [self.g, self.theta, self.phi]: normal_init(m.conv, std=std) @@ -114,8 +113,7 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): else: normal_init(self.conv_out.norm, std=std) - def gaussian(self, theta_x: torch.Tensor, - phi_x: torch.Tensor) -> torch.Tensor: + def gaussian(self, theta_x, phi_x): # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] @@ -123,8 +121,7 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): pairwise_weight = pairwise_weight.softmax(dim=-1) return pairwise_weight - def embedded_gaussian(self, theta_x: torch.Tensor, - phi_x: torch.Tensor) -> torch.Tensor: + def embedded_gaussian(self, theta_x, phi_x): # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] @@ -135,8 +132,7 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): pairwise_weight = pairwise_weight.softmax(dim=-1) return pairwise_weight - def dot_product(self, theta_x: torch.Tensor, - phi_x: torch.Tensor) -> torch.Tensor: + def dot_product(self, theta_x, phi_x): # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] @@ -144,8 +140,7 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): pairwise_weight /= pairwise_weight.shape[-1] return pairwise_weight - def concatenation(self, theta_x: torch.Tensor, - phi_x: torch.Tensor) -> torch.Tensor: + def concatenation(self, theta_x, phi_x): # NonLocal1d pairwise_weight: [N, H, H] # NonLocal2d pairwise_weight: [N, HxW, HxW] # NonLocal3d pairwise_weight: [N, TxHxW, TxHxW] @@ -162,7 +157,7 @@ class _NonLocalNd(nn.Module, metaclass=ABCMeta): return pairwise_weight - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): # Assume `reduction = 1`, then `inter_channels = C` # or `inter_channels = C` when `mode="gaussian"` @@ -229,11 +224,12 @@ class NonLocal1d(_NonLocalNd): """ def __init__(self, - in_channels: int, - sub_sample: bool = False, - conv_cfg: Dict = dict(type='Conv1d'), + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv1d'), **kwargs): - super().__init__(in_channels, conv_cfg=conv_cfg, **kwargs) + super(NonLocal1d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample @@ -246,7 +242,7 @@ class NonLocal1d(_NonLocalNd): self.phi = max_pool_layer -@MODELS.register_module() +@PLUGIN_LAYERS.register_module() class NonLocal2d(_NonLocalNd): """2D Non-local module. @@ -262,11 +258,12 @@ class NonLocal2d(_NonLocalNd): _abbr_ = 'nonlocal_block' def __init__(self, - in_channels: int, - sub_sample: bool = False, - conv_cfg: Dict = dict(type='Conv2d'), + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv2d'), **kwargs): - super().__init__(in_channels, conv_cfg=conv_cfg, **kwargs) + super(NonLocal2d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample @@ -292,11 +289,12 @@ class NonLocal3d(_NonLocalNd): """ def __init__(self, - in_channels: int, - sub_sample: bool = False, - conv_cfg: Dict = dict(type='Conv3d'), + in_channels, + sub_sample=False, + conv_cfg=dict(type='Conv3d'), **kwargs): - super().__init__(in_channels, conv_cfg=conv_cfg, **kwargs) + super(NonLocal3d, self).__init__( + in_channels, conv_cfg=conv_cfg, **kwargs) self.sub_sample = sub_sample if sub_sample: diff --git a/mmcv/cnn/bricks/norm.py b/mmcv/cnn/bricks/norm.py index 2fff684..cfb326b 100644 --- a/mmcv/cnn/bricks/norm.py +++ b/mmcv/cnn/bricks/norm.py @@ -1,24 +1,23 @@ # Copyright (c) OpenMMLab. All rights reserved. import inspect -from typing import Dict, Tuple, Union import torch.nn as nn -from mmengine.registry import MODELS -from mmengine.utils import is_tuple_of -from mmengine.utils.dl_utils.parrots_wrapper import (SyncBatchNorm, _BatchNorm, - _InstanceNorm) - -MODELS.register_module('BN', module=nn.BatchNorm2d) -MODELS.register_module('BN1d', module=nn.BatchNorm1d) -MODELS.register_module('BN2d', module=nn.BatchNorm2d) -MODELS.register_module('BN3d', module=nn.BatchNorm3d) -MODELS.register_module('SyncBN', module=SyncBatchNorm) -MODELS.register_module('GN', module=nn.GroupNorm) -MODELS.register_module('LN', module=nn.LayerNorm) -MODELS.register_module('IN', module=nn.InstanceNorm2d) -MODELS.register_module('IN1d', module=nn.InstanceNorm1d) -MODELS.register_module('IN2d', module=nn.InstanceNorm2d) -MODELS.register_module('IN3d', module=nn.InstanceNorm3d) + +from mmcv.utils import is_tuple_of +from mmcv.utils.parrots_wrapper import SyncBatchNorm, _BatchNorm, _InstanceNorm +from .registry import NORM_LAYERS + +NORM_LAYERS.register_module('BN', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN1d', module=nn.BatchNorm1d) +NORM_LAYERS.register_module('BN2d', module=nn.BatchNorm2d) +NORM_LAYERS.register_module('BN3d', module=nn.BatchNorm3d) +NORM_LAYERS.register_module('SyncBN', module=SyncBatchNorm) +NORM_LAYERS.register_module('GN', module=nn.GroupNorm) +NORM_LAYERS.register_module('LN', module=nn.LayerNorm) +NORM_LAYERS.register_module('IN', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN1d', module=nn.InstanceNorm1d) +NORM_LAYERS.register_module('IN2d', module=nn.InstanceNorm2d) +NORM_LAYERS.register_module('IN3d', module=nn.InstanceNorm3d) def infer_abbr(class_type): @@ -70,9 +69,7 @@ def infer_abbr(class_type): return 'norm_layer' -def build_norm_layer(cfg: Dict, - num_features: int, - postfix: Union[int, str] = '') -> Tuple[str, nn.Module]: +def build_norm_layer(cfg, num_features, postfix=''): """Build normalization layer. Args: @@ -86,9 +83,9 @@ def build_norm_layer(cfg: Dict, to create named layer. Returns: - tuple[str, nn.Module]: The first element is the layer name consisting - of abbreviation and postfix, e.g., bn1, gn. The second element is the - created norm layer. + (str, nn.Module): The first element is the layer name consisting of + abbreviation and postfix, e.g., bn1, gn. The second element is the + created norm layer. """ if not isinstance(cfg, dict): raise TypeError('cfg must be a dict') @@ -97,15 +94,10 @@ def build_norm_layer(cfg: Dict, cfg_ = cfg.copy() layer_type = cfg_.pop('type') + if layer_type not in NORM_LAYERS: + raise KeyError(f'Unrecognized norm type {layer_type}') - # Switch registry to the target scope. If `norm_layer` cannot be found - # in the registry, fallback to search `norm_layer` in the - # mmengine.MODELS. - with MODELS.switch_scope_and_registry(None) as registry: - norm_layer = registry.get(layer_type) - if norm_layer is None: - raise KeyError(f'Cannot find {norm_layer} in registry under scope ' - f'name {registry.scope}') + norm_layer = NORM_LAYERS.get(layer_type) abbr = infer_abbr(norm_layer) assert isinstance(postfix, (int, str)) @@ -127,8 +119,7 @@ def build_norm_layer(cfg: Dict, return name, layer -def is_norm(layer: nn.Module, - exclude: Union[type, tuple, None] = None) -> bool: +def is_norm(layer, exclude=None): """Check if a layer is a normalization layer. Args: diff --git a/mmcv/cnn/bricks/padding.py b/mmcv/cnn/bricks/padding.py index 4135a19..e4ac6b2 100644 --- a/mmcv/cnn/bricks/padding.py +++ b/mmcv/cnn/bricks/padding.py @@ -1,19 +1,18 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict - import torch.nn as nn -from mmengine.registry import MODELS -MODELS.register_module('zero', module=nn.ZeroPad2d) -MODELS.register_module('reflect', module=nn.ReflectionPad2d) -MODELS.register_module('replicate', module=nn.ReplicationPad2d) +from .registry import PADDING_LAYERS + +PADDING_LAYERS.register_module('zero', module=nn.ZeroPad2d) +PADDING_LAYERS.register_module('reflect', module=nn.ReflectionPad2d) +PADDING_LAYERS.register_module('replicate', module=nn.ReplicationPad2d) -def build_padding_layer(cfg: Dict, *args, **kwargs) -> nn.Module: +def build_padding_layer(cfg, *args, **kwargs): """Build padding layer. Args: - cfg (dict): The padding layer config, which should contain: + cfg (None or dict): The padding layer config, which should contain: - type (str): Layer type. - layer args: Args needed to instantiate a padding layer. @@ -27,15 +26,11 @@ def build_padding_layer(cfg: Dict, *args, **kwargs) -> nn.Module: cfg_ = cfg.copy() padding_type = cfg_.pop('type') + if padding_type not in PADDING_LAYERS: + raise KeyError(f'Unrecognized padding type {padding_type}.') + else: + padding_layer = PADDING_LAYERS.get(padding_type) - # Switch registry to the target scope. If `padding_layer` cannot be found - # in the registry, fallback to search `padding_layer` in the - # mmengine.MODELS. - with MODELS.switch_scope_and_registry(None) as registry: - padding_layer = registry.get(padding_type) - if padding_layer is None: - raise KeyError(f'Cannot find {padding_layer} in registry under scope ' - f'name {registry.scope}') layer = padding_layer(*args, **kwargs, **cfg_) return layer diff --git a/mmcv/cnn/bricks/plugin.py b/mmcv/cnn/bricks/plugin.py index 83ba373..07c010d 100644 --- a/mmcv/cnn/bricks/plugin.py +++ b/mmcv/cnn/bricks/plugin.py @@ -1,18 +1,15 @@ -# Copyright (c) OpenMMLab. All rights reserved. import inspect import platform -from typing import Dict, Tuple, Union -import torch.nn as nn -from mmengine.registry import MODELS +from .registry import PLUGIN_LAYERS if platform.system() == 'Windows': - import regex as re # type: ignore + import regex as re else: - import re # type: ignore + import re -def infer_abbr(class_type: type) -> str: +def infer_abbr(class_type): """Infer abbreviation from the class name. This method will infer the abbreviation to map class types to @@ -50,27 +47,25 @@ def infer_abbr(class_type: type) -> str: raise TypeError( f'class_type must be a type, but got {type(class_type)}') if hasattr(class_type, '_abbr_'): - return class_type._abbr_ # type: ignore + return class_type._abbr_ else: return camel2snack(class_type.__name__) -def build_plugin_layer(cfg: Dict, - postfix: Union[int, str] = '', - **kwargs) -> Tuple[str, nn.Module]: +def build_plugin_layer(cfg, postfix='', **kwargs): """Build plugin layer. Args: - cfg (dict): cfg should contain: - - - type (str): identify plugin layer type. - - layer args: args needed to instantiate a plugin layer. + cfg (None or dict): cfg should contain: + type (str): identify plugin layer type. + layer args: args needed to instantiate a plugin layer. postfix (int, str): appended into norm abbreviation to create named layer. Default: ''. Returns: - tuple[str, nn.Module]: The first one is the concatenation of - abbreviation and postfix. The second is the created plugin layer. + tuple[str, nn.Module]: + name (str): abbreviation + postfix + layer (nn.Module): created plugin layer """ if not isinstance(cfg, dict): raise TypeError('cfg must be a dict') @@ -79,15 +74,10 @@ def build_plugin_layer(cfg: Dict, cfg_ = cfg.copy() layer_type = cfg_.pop('type') + if layer_type not in PLUGIN_LAYERS: + raise KeyError(f'Unrecognized plugin type {layer_type}') - # Switch registry to the target scope. If `plugin_layer` cannot be found - # in the registry, fallback to search `plugin_layer` in the - # mmengine.MODELS. - with MODELS.switch_scope_and_registry(None) as registry: - plugin_layer = registry.get(layer_type) - if plugin_layer is None: - raise KeyError(f'Cannot find {plugin_layer} in registry under scope ' - f'name {registry.scope}') + plugin_layer = PLUGIN_LAYERS.get(layer_type) abbr = infer_abbr(plugin_layer) assert isinstance(postfix, (int, str)) diff --git a/mmcv/cnn/bricks/registry.py b/mmcv/cnn/bricks/registry.py new file mode 100644 index 0000000..c292797 --- /dev/null +++ b/mmcv/cnn/bricks/registry.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.utils import Registry + +CONV_LAYERS = Registry('conv layer') +NORM_LAYERS = Registry('norm layer') +ACTIVATION_LAYERS = Registry('activation layer') +PADDING_LAYERS = Registry('padding layer') +UPSAMPLE_LAYERS = Registry('upsample layer') +PLUGIN_LAYERS = Registry('plugin layer') + +DROPOUT_LAYERS = Registry('drop out layers') +POSITIONAL_ENCODING = Registry('position encoding') +ATTENTION = Registry('attention') +FEEDFORWARD_NETWORK = Registry('feed-forward Network') +TRANSFORMER_LAYER = Registry('transformerLayer') +TRANSFORMER_LAYER_SEQUENCE = Registry('transformer-layers sequence') diff --git a/mmcv/cnn/bricks/scale.py b/mmcv/cnn/bricks/scale.py index a473798..c905fff 100644 --- a/mmcv/cnn/bricks/scale.py +++ b/mmcv/cnn/bricks/scale.py @@ -13,45 +13,9 @@ class Scale(nn.Module): scale (float): Initial value of scale factor. Default: 1.0 """ - def __init__(self, scale: float = 1.0): - super().__init__() + def __init__(self, scale=1.0): + super(Scale, self).__init__() self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): return x * self.scale - - -class LayerScale(nn.Module): - """LayerScale layer. - - Args: - dim (int): Dimension of input features. - inplace (bool): Whether performs operation in-place. - Default: `False`. - data_format (str): The input data format, could be 'channels_last' - or 'channels_first', representing (B, C, H, W) and - (B, N, C) format data respectively. Default: 'channels_last'. - scale (float): Initial value of scale factor. Default: 1.0 - """ - - def __init__(self, - dim: int, - inplace: bool = False, - data_format: str = 'channels_last', - scale: float = 1e-5): - super().__init__() - assert data_format in ('channels_last', 'channels_first'), \ - "'data_format' could only be channels_last or channels_first." - self.inplace = inplace - self.data_format = data_format - self.weight = nn.Parameter(torch.ones(dim) * scale) - - def forward(self, x) -> torch.Tensor: - if self.data_format == 'channels_first': - shape = tuple((1, -1, *(1 for _ in range(x.dim() - 2)))) - else: - shape = tuple((*(1 for _ in range(x.dim() - 1)), -1)) - if self.inplace: - return x.mul_(self.weight.view(*shape)) - else: - return x * self.weight.view(*shape) diff --git a/mmcv/cnn/bricks/swish.py b/mmcv/cnn/bricks/swish.py index 75ad75b..e2ca8ed 100644 --- a/mmcv/cnn/bricks/swish.py +++ b/mmcv/cnn/bricks/swish.py @@ -1,10 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. import torch import torch.nn as nn -from mmengine.registry import MODELS +from .registry import ACTIVATION_LAYERS -@MODELS.register_module() + +@ACTIVATION_LAYERS.register_module() class Swish(nn.Module): """Swish Module. @@ -18,7 +19,7 @@ class Swish(nn.Module): """ def __init__(self): - super().__init__() + super(Swish, self).__init__() - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): return x * torch.sigmoid(x) diff --git a/mmcv/cnn/bricks/transformer.py b/mmcv/cnn/bricks/transformer.py index f83b9a6..ed32688 100644 --- a/mmcv/cnn/bricks/transformer.py +++ b/mmcv/cnn/bricks/transformer.py @@ -1,26 +1,21 @@ # Copyright (c) OpenMMLab. All rights reserved. import copy -import math import warnings -from typing import Sequence import torch import torch.nn as nn -import torch.nn.functional as F -from mmengine.config import ConfigDict -from mmengine.model import BaseModule, ModuleList, Sequential -from mmengine.registry import MODELS -from mmengine.utils import deprecated_api_warning, to_2tuple - -from mmcv.cnn import (Linear, build_activation_layer, build_conv_layer, - build_norm_layer) + +from mmcv import ConfigDict, deprecated_api_warning +from mmcv.cnn import Linear, build_activation_layer, build_norm_layer +from mmcv.runner.base_module import BaseModule, ModuleList, Sequential +from mmcv.utils import build_from_cfg from .drop import build_dropout -from .scale import LayerScale +from .registry import (ATTENTION, FEEDFORWARD_NETWORK, POSITIONAL_ENCODING, + TRANSFORMER_LAYER, TRANSFORMER_LAYER_SEQUENCE) # Avoid BC-breaking of importing MultiScaleDeformableAttention from this file try: - from mmcv.ops.multi_scale_deform_attn import \ - MultiScaleDeformableAttention # noqa F401 + from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention # noqa F401 warnings.warn( ImportWarning( '``MultiScaleDeformableAttention`` has been moved to ' @@ -32,379 +27,35 @@ try: except ImportError: warnings.warn('Fail to import ``MultiScaleDeformableAttention`` from ' '``mmcv.ops.multi_scale_deform_attn``, ' - 'You should install ``mmcv`` rather than ``mmcv-lite`` ' - 'if you need this module. ') + 'You should install ``mmcv-full`` if you need this module. ') def build_positional_encoding(cfg, default_args=None): """Builder for Position Encoding.""" - return MODELS.build(cfg, default_args=default_args) + return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) def build_attention(cfg, default_args=None): """Builder for attention.""" - return MODELS.build(cfg, default_args=default_args) + return build_from_cfg(cfg, ATTENTION, default_args) def build_feedforward_network(cfg, default_args=None): """Builder for feed-forward network (FFN).""" - return MODELS.build(cfg, default_args=default_args) + return build_from_cfg(cfg, FEEDFORWARD_NETWORK, default_args) def build_transformer_layer(cfg, default_args=None): """Builder for transformer layer.""" - return MODELS.build(cfg, default_args=default_args) + return build_from_cfg(cfg, TRANSFORMER_LAYER, default_args) def build_transformer_layer_sequence(cfg, default_args=None): """Builder for transformer encoder and transformer decoder.""" - return MODELS.build(cfg, default_args=default_args) - - -class AdaptivePadding(nn.Module): - """Applies padding adaptively to the input. - - This module can make input get fully covered by filter - you specified. It support two modes "same" and "corner". The - "same" mode is same with "SAME" padding mode in TensorFlow, pad - zero around input. The "corner" mode would pad zero - to bottom right. - - Args: - kernel_size (int | tuple): Size of the kernel. Default: 1. - stride (int | tuple): Stride of the filter. Default: 1. - dilation (int | tuple): Spacing between kernel elements. - Default: 1. - padding (str): Support "same" and "corner", "corner" mode - would pad zero to bottom right, and "same" mode would - pad zero around input. Default: "corner". - - Example: - >>> kernel_size = 16 - >>> stride = 16 - >>> dilation = 1 - >>> input = torch.rand(1, 1, 15, 17) - >>> adap_pad = AdaptivePadding( - >>> kernel_size=kernel_size, - >>> stride=stride, - >>> dilation=dilation, - >>> padding="corner") - >>> out = adap_pad(input) - >>> assert (out.shape[2], out.shape[3]) == (16, 32) - >>> input = torch.rand(1, 1, 16, 17) - >>> out = adap_pad(input) - >>> assert (out.shape[2], out.shape[3]) == (16, 32) - """ - - def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'): - super().__init__() - assert padding in ('same', 'corner') - - kernel_size = to_2tuple(kernel_size) - stride = to_2tuple(stride) - dilation = to_2tuple(dilation) - - self.padding = padding - self.kernel_size = kernel_size - self.stride = stride - self.dilation = dilation - - def get_pad_shape(self, input_shape): - """Calculate the padding size of input. - - Args: - input_shape (:obj:`torch.Size`): arrange as (H, W). - - Returns: - Tuple[int]: The padding size along the - original H and W directions - """ - input_h, input_w = input_shape - kernel_h, kernel_w = self.kernel_size - stride_h, stride_w = self.stride - output_h = math.ceil(input_h / stride_h) - output_w = math.ceil(input_w / stride_w) - pad_h = max((output_h - 1) * stride_h + - (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0) - pad_w = max((output_w - 1) * stride_w + - (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0) - return pad_h, pad_w - - def forward(self, x): - """Add padding to `x` - - Args: - x (Tensor): Input tensor has shape (B, C, H, W). - - Returns: - Tensor: The tensor with adaptive padding - """ - pad_h, pad_w = self.get_pad_shape(x.size()[-2:]) - if pad_h > 0 or pad_w > 0: - if self.padding == 'corner': - x = F.pad(x, [0, pad_w, 0, pad_h]) - elif self.padding == 'same': - x = F.pad(x, [ - pad_w // 2, pad_w - pad_w // 2, pad_h // 2, - pad_h - pad_h // 2 - ]) - return x + return build_from_cfg(cfg, TRANSFORMER_LAYER_SEQUENCE, default_args) -class PatchEmbed(BaseModule): - """Image to Patch Embedding. - - We use a conv layer to implement PatchEmbed. - - Args: - in_channels (int): The num of input channels. Default: 3 - embed_dims (int): The dimensions of embedding. Default: 768 - conv_type (str): The type of convolution - to generate patch embedding. Default: "Conv2d". - kernel_size (int): The kernel_size of embedding conv. Default: 16. - stride (int): The slide stride of embedding conv. - Default: 16. - padding (int | tuple | string): The padding length of - embedding conv. When it is a string, it means the mode - of adaptive padding, support "same" and "corner" now. - Default: "corner". - dilation (int): The dilation rate of embedding conv. Default: 1. - bias (bool): Bias of embed conv. Default: True. - norm_cfg (dict, optional): Config dict for normalization layer. - Default: None. - input_size (int | tuple | None): The size of input, which will be - used to calculate the out size. Only works when `dynamic_size` - is False. Default: None. - init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization. - Default: None. - """ - - def __init__(self, - in_channels=3, - embed_dims=768, - conv_type='Conv2d', - kernel_size=16, - stride=16, - padding='corner', - dilation=1, - bias=True, - norm_cfg=None, - input_size=None, - init_cfg=None): - super().__init__(init_cfg=init_cfg) - - self.embed_dims = embed_dims - if stride is None: - stride = kernel_size - - kernel_size = to_2tuple(kernel_size) - stride = to_2tuple(stride) - dilation = to_2tuple(dilation) - - if isinstance(padding, str): - self.adaptive_padding = AdaptivePadding( - kernel_size=kernel_size, - stride=stride, - dilation=dilation, - padding=padding) - # disable the padding of conv - padding = 0 - else: - self.adaptive_padding = None - padding = to_2tuple(padding) - - self.projection = build_conv_layer( - dict(type=conv_type), - in_channels=in_channels, - out_channels=embed_dims, - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - bias=bias) - - if norm_cfg is not None: - self.norm = build_norm_layer(norm_cfg, embed_dims)[1] - else: - self.norm = None - - if input_size: - input_size = to_2tuple(input_size) - # `init_out_size` would be used outside to - # calculate the num_patches - # e.g. when `use_abs_pos_embed` outside - self.init_input_size = input_size - if self.adaptive_padding: - pad_h, pad_w = self.adaptive_padding.get_pad_shape(input_size) - input_h, input_w = input_size - input_h = input_h + pad_h - input_w = input_w + pad_w - input_size = (input_h, input_w) - - # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html - h_out = (input_size[0] + 2 * padding[0] - dilation[0] * - (kernel_size[0] - 1) - 1) // stride[0] + 1 - w_out = (input_size[1] + 2 * padding[1] - dilation[1] * - (kernel_size[1] - 1) - 1) // stride[1] + 1 - self.init_out_size = (h_out, w_out) - else: - self.init_input_size = None - self.init_out_size = None - - def forward(self, x): - """ - Args: - x (Tensor): Has shape (B, C, H, W). In most case, C is 3. - - Returns: - tuple: Contains merged results and its spatial shape. - - - x (Tensor): Has shape (B, out_h * out_w, embed_dims) - - out_size (tuple[int]): Spatial shape of x, arrange as - (out_h, out_w). - """ - - if self.adaptive_padding: - x = self.adaptive_padding(x) - - x = self.projection(x) - out_size = (x.shape[2], x.shape[3]) - x = x.flatten(2).transpose(1, 2) - if self.norm is not None: - x = self.norm(x) - return x, out_size - - -class PatchMerging(BaseModule): - """Merge patch feature map. - - This layer groups feature map by kernel_size, and applies norm and linear - layers to the grouped feature map ((used in Swin Transformer)). - Our implementation uses `nn.Unfold` to - merge patches, which is about 25% faster than the original - implementation. However, we need to modify pretrained - models for compatibility. - - Args: - in_channels (int): The num of input channels. - to gets fully covered by filter and stride you specified. - out_channels (int): The num of output channels. - kernel_size (int | tuple, optional): the kernel size in the unfold - layer. Defaults to 2. - stride (int | tuple, optional): the stride of the sliding blocks in the - unfold layer. Default: None. (Would be set as `kernel_size`) - padding (int | tuple | string ): The padding length of - embedding conv. When it is a string, it means the mode - of adaptive padding, support "same" and "corner" now. - Default: "corner". - dilation (int | tuple, optional): dilation parameter in the unfold - layer. Default: 1. - bias (bool, optional): Whether to add bias in linear layer or not. - Defaults: False. - norm_cfg (dict, optional): Config dict for normalization layer. - Default: dict(type='LN'). - init_cfg (dict, optional): The extra config for initialization. - Default: None. - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size=2, - stride=None, - padding='corner', - dilation=1, - bias=False, - norm_cfg=dict(type='LN'), - init_cfg=None): - super().__init__(init_cfg=init_cfg) - self.in_channels = in_channels - self.out_channels = out_channels - if stride: - stride = stride - else: - stride = kernel_size - - kernel_size = to_2tuple(kernel_size) - stride = to_2tuple(stride) - dilation = to_2tuple(dilation) - - if isinstance(padding, str): - self.adaptive_padding = AdaptivePadding( - kernel_size=kernel_size, - stride=stride, - dilation=dilation, - padding=padding) - # disable the padding of unfold - padding = 0 - else: - self.adaptive_padding = None - - padding = to_2tuple(padding) - self.sampler = nn.Unfold( - kernel_size=kernel_size, - dilation=dilation, - padding=padding, - stride=stride) - - sample_dim = kernel_size[0] * kernel_size[1] * in_channels - - if norm_cfg is not None: - self.norm = build_norm_layer(norm_cfg, sample_dim)[1] - else: - self.norm = None - - self.reduction = nn.Linear(sample_dim, out_channels, bias=bias) - - def forward(self, x, input_size): - """ - Args: - x (Tensor): Has shape (B, H*W, C_in). - input_size (tuple[int]): The spatial shape of x, arrange as (H, W). - Default: None. - - Returns: - tuple: Contains merged results and its spatial shape. - - - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out) - - out_size (tuple[int]): Spatial shape of x, arrange as - (Merged_H, Merged_W). - """ - B, L, C = x.shape - assert isinstance(input_size, Sequence), f'Expect ' \ - f'input_size is ' \ - f'`Sequence` ' \ - f'but get {input_size}' - - H, W = input_size - assert L == H * W, 'input feature has wrong size' - - x = x.view(B, H, W, C).permute([0, 3, 1, 2]) # B, C, H, W - - if self.adaptive_padding: - x = self.adaptive_padding(x) - H, W = x.shape[-2:] - - # Use nn.Unfold to merge patch. About 25% faster than original method, - # but need to modify pretrained model for compatibility - # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2) - x = self.sampler(x) - - out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] * - (self.sampler.kernel_size[0] - 1) - - 1) // self.sampler.stride[0] + 1 - out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] * - (self.sampler.kernel_size[1] - 1) - - 1) // self.sampler.stride[1] + 1 - - output_size = (out_h, out_w) - x = x.transpose(1, 2) # B, H/2*W/2, 4*C - x = self.norm(x) if self.norm else x - x = self.reduction(x) - return x, output_size - - -@MODELS.register_module() +@ATTENTION.register_module() class MultiheadAttention(BaseModule): """A wrapper for ``torch.nn.MultiheadAttention``. @@ -436,13 +87,12 @@ class MultiheadAttention(BaseModule): init_cfg=None, batch_first=False, **kwargs): - super().__init__(init_cfg) + super(MultiheadAttention, self).__init__(init_cfg) if 'dropout' in kwargs: - warnings.warn( - 'The arguments `dropout` in MultiheadAttention ' - 'has been deprecated, now you can separately ' - 'set `attn_drop`(float), proj_drop(float), ' - 'and `dropout_layer`(dict) ', DeprecationWarning) + warnings.warn('The arguments `dropout` in MultiheadAttention ' + 'has been deprecated, now you can separately ' + 'set `attn_drop`(float), proj_drop(float), ' + 'and `dropout_layer`(dict) ') attn_drop = kwargs['dropout'] dropout_layer['drop_prob'] = kwargs.pop('dropout') @@ -504,9 +154,9 @@ class MultiheadAttention(BaseModule): Returns: Tensor: forwarded results with shape - [num_queries, bs, embed_dims] - if self.batch_first is False, else - [bs, num_queries embed_dims]. + [num_queries, bs, embed_dims] + if self.batch_first is False, else + [bs, num_queries embed_dims]. """ if key is None: @@ -552,7 +202,7 @@ class MultiheadAttention(BaseModule): return identity + self.dropout_layer(self.proj_drop(out)) -@MODELS.register_module() +@FEEDFORWARD_NETWORK.register_module() class FFN(BaseModule): """Implements feed-forward networks (FFNs) with identity connection. @@ -573,8 +223,6 @@ class FFN(BaseModule): when adding the shortcut. init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. Default: None. - layer_scale_init_value (float): Initial value of scale factor in - LayerScale. Default: 1.0 """ @deprecated_api_warning( @@ -592,21 +240,23 @@ class FFN(BaseModule): dropout_layer=None, add_identity=True, init_cfg=None, - layer_scale_init_value=0.): - super().__init__(init_cfg) + **kwargs): + super(FFN, self).__init__(init_cfg) assert num_fcs >= 2, 'num_fcs should be no less ' \ f'than 2. got {num_fcs}.' self.embed_dims = embed_dims self.feedforward_channels = feedforward_channels self.num_fcs = num_fcs + self.act_cfg = act_cfg + self.activate = build_activation_layer(act_cfg) layers = [] in_channels = embed_dims for _ in range(num_fcs - 1): layers.append( Sequential( - Linear(in_channels, feedforward_channels), - build_activation_layer(act_cfg), nn.Dropout(ffn_drop))) + Linear(in_channels, feedforward_channels), self.activate, + nn.Dropout(ffn_drop))) in_channels = feedforward_channels layers.append(Linear(feedforward_channels, embed_dims)) layers.append(nn.Dropout(ffn_drop)) @@ -615,11 +265,6 @@ class FFN(BaseModule): dropout_layer) if dropout_layer else torch.nn.Identity() self.add_identity = add_identity - if layer_scale_init_value > 0: - self.gamma2 = LayerScale(embed_dims, scale=layer_scale_init_value) - else: - self.gamma2 = nn.Identity() - @deprecated_api_warning({'residual': 'identity'}, cls_name='FFN') def forward(self, x, identity=None): """Forward function for `FFN`. @@ -627,7 +272,6 @@ class FFN(BaseModule): The function would add x to the output tensor if residue is None. """ out = self.layers(x) - out = self.gamma2(out) if not self.add_identity: return self.dropout_layer(out) if identity is None: @@ -635,7 +279,7 @@ class FFN(BaseModule): return identity + self.dropout_layer(out) -@MODELS.register_module() +@TRANSFORMER_LAYER.register_module() class BaseTransformerLayer(BaseModule): """Base `TransformerLayer` for vision transformer. @@ -698,15 +342,15 @@ class BaseTransformerLayer(BaseModule): f'The arguments `{ori_name}` in BaseTransformerLayer ' f'has been deprecated, now you should set `{new_name}` ' f'and other FFN related arguments ' - f'to a dict named `ffn_cfgs`. ', DeprecationWarning) + f'to a dict named `ffn_cfgs`. ') ffn_cfgs[new_name] = kwargs[ori_name] - super().__init__(init_cfg) + super(BaseTransformerLayer, self).__init__(init_cfg) self.batch_first = batch_first - assert set(operation_order) & { - 'self_attn', 'norm', 'ffn', 'cross_attn'} == \ + assert set(operation_order) & set( + ['self_attn', 'norm', 'ffn', 'cross_attn']) == \ set(operation_order), f'The operation_order of' \ f' {self.__class__.__name__} should ' \ f'contains all four operation type ' \ @@ -753,7 +397,7 @@ class BaseTransformerLayer(BaseModule): assert len(ffn_cfgs) == num_ffns for ffn_index in range(num_ffns): if 'embed_dims' not in ffn_cfgs[ffn_index]: - ffn_cfgs[ffn_index]['embed_dims'] = self.embed_dims + ffn_cfgs['embed_dims'] = self.embed_dims else: assert ffn_cfgs[ffn_index]['embed_dims'] == self.embed_dims self.ffns.append( @@ -866,7 +510,7 @@ class BaseTransformerLayer(BaseModule): return query -@MODELS.register_module() +@TRANSFORMER_LAYER_SEQUENCE.register_module() class TransformerLayerSequence(BaseModule): """Base class for TransformerEncoder and TransformerDecoder in vision transformer. @@ -887,7 +531,7 @@ class TransformerLayerSequence(BaseModule): """ def __init__(self, transformerlayers=None, num_layers=None, init_cfg=None): - super().__init__(init_cfg) + super(TransformerLayerSequence, self).__init__(init_cfg) if isinstance(transformerlayers, dict): transformerlayers = [ copy.deepcopy(transformerlayers) for _ in range(num_layers) diff --git a/mmcv/cnn/bricks/upsample.py b/mmcv/cnn/bricks/upsample.py index d91689a..a1a3537 100644 --- a/mmcv/cnn/bricks/upsample.py +++ b/mmcv/cnn/bricks/upsample.py @@ -1,17 +1,15 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Dict - -import torch import torch.nn as nn import torch.nn.functional as F -from mmengine.model import xavier_init -from mmengine.registry import MODELS -MODELS.register_module('nearest', module=nn.Upsample) -MODELS.register_module('bilinear', module=nn.Upsample) +from ..utils import xavier_init +from .registry import UPSAMPLE_LAYERS + +UPSAMPLE_LAYERS.register_module('nearest', module=nn.Upsample) +UPSAMPLE_LAYERS.register_module('bilinear', module=nn.Upsample) -@MODELS.register_module(name='pixel_shuffle') +@UPSAMPLE_LAYERS.register_module(name='pixel_shuffle') class PixelShufflePack(nn.Module): """Pixel Shuffle upsample layer. @@ -26,9 +24,9 @@ class PixelShufflePack(nn.Module): channels. """ - def __init__(self, in_channels: int, out_channels: int, scale_factor: int, - upsample_kernel: int): - super().__init__() + def __init__(self, in_channels, out_channels, scale_factor, + upsample_kernel): + super(PixelShufflePack, self).__init__() self.in_channels = in_channels self.out_channels = out_channels self.scale_factor = scale_factor @@ -43,13 +41,13 @@ class PixelShufflePack(nn.Module): def init_weights(self): xavier_init(self.upsample_conv, distribution='uniform') - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): x = self.upsample_conv(x) x = F.pixel_shuffle(x, self.scale_factor) return x -def build_upsample_layer(cfg: Dict, *args, **kwargs) -> nn.Module: +def build_upsample_layer(cfg, *args, **kwargs): """Build upsample layer. Args: @@ -57,7 +55,7 @@ def build_upsample_layer(cfg: Dict, *args, **kwargs) -> nn.Module: - type (str): Layer type. - scale_factor (int): Upsample ratio, which is not applicable to - deconv. + deconv. - layer args: Args needed to instantiate a upsample layer. args (argument list): Arguments passed to the ``__init__`` method of the corresponding conv layer. @@ -75,15 +73,11 @@ def build_upsample_layer(cfg: Dict, *args, **kwargs) -> nn.Module: cfg_ = cfg.copy() layer_type = cfg_.pop('type') + if layer_type not in UPSAMPLE_LAYERS: + raise KeyError(f'Unrecognized upsample type {layer_type}') + else: + upsample = UPSAMPLE_LAYERS.get(layer_type) - # Switch registry to the target scope. If `upsample` cannot be found - # in the registry, fallback to search `upsample` in the - # mmengine.MODELS. - with MODELS.switch_scope_and_registry(None) as registry: - upsample = registry.get(layer_type) - if upsample is None: - raise KeyError(f'Cannot find {upsample} in registry under scope ' - f'name {registry.scope}') if upsample is nn.Upsample: cfg_['mode'] = layer_type layer = upsample(*args, **kwargs, **cfg_) diff --git a/mmcv/cnn/bricks/wrappers.py b/mmcv/cnn/bricks/wrappers.py index 07eb04e..8aebf67 100644 --- a/mmcv/cnn/bricks/wrappers.py +++ b/mmcv/cnn/bricks/wrappers.py @@ -9,9 +9,10 @@ import math import torch import torch.nn as nn -from mmengine.registry import MODELS from torch.nn.modules.utils import _pair, _triple +from .registry import CONV_LAYERS, UPSAMPLE_LAYERS + if torch.__version__ == 'parrots': TORCH_VERSION = torch.__version__ else: @@ -20,27 +21,27 @@ else: TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2]) -def obsolete_torch_version(torch_version, version_threshold) -> bool: +def obsolete_torch_version(torch_version, version_threshold): return torch_version == 'parrots' or torch_version <= version_threshold class NewEmptyTensorOp(torch.autograd.Function): @staticmethod - def forward(ctx, x: torch.Tensor, new_shape: tuple) -> torch.Tensor: + def forward(ctx, x, new_shape): ctx.shape = x.shape return x.new_empty(new_shape) @staticmethod - def backward(ctx, grad: torch.Tensor) -> tuple: + def backward(ctx, grad): shape = ctx.shape return NewEmptyTensorOp.apply(grad, shape), None -@MODELS.register_module('Conv', force=True) +@CONV_LAYERS.register_module('Conv', force=True) class Conv2d(nn.Conv2d): - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size, @@ -58,10 +59,10 @@ class Conv2d(nn.Conv2d): return super().forward(x) -@MODELS.register_module('Conv3d', force=True) +@CONV_LAYERS.register_module('Conv3d', force=True) class Conv3d(nn.Conv3d): - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] for i, k, p, s, d in zip(x.shape[-3:], self.kernel_size, @@ -79,11 +80,12 @@ class Conv3d(nn.Conv3d): return super().forward(x) -@MODELS.register_module() -@MODELS.register_module('deconv') +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv') +@UPSAMPLE_LAYERS.register_module('deconv', force=True) class ConvTranspose2d(nn.ConvTranspose2d): - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size, @@ -101,11 +103,12 @@ class ConvTranspose2d(nn.ConvTranspose2d): return super().forward(x) -@MODELS.register_module() -@MODELS.register_module('deconv3d') +@CONV_LAYERS.register_module() +@CONV_LAYERS.register_module('deconv3d') +@UPSAMPLE_LAYERS.register_module('deconv3d', force=True) class ConvTranspose3d(nn.ConvTranspose3d): - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 4)): out_shape = [x.shape[0], self.out_channels] for i, k, p, s, d, op in zip(x.shape[-3:], self.kernel_size, @@ -125,7 +128,7 @@ class ConvTranspose3d(nn.ConvTranspose3d): class MaxPool2d(nn.MaxPool2d): - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): # PyTorch 1.9 does not support empty tensor inference yet if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): out_shape = list(x.shape[:2]) @@ -143,7 +146,7 @@ class MaxPool2d(nn.MaxPool2d): class MaxPool3d(nn.MaxPool3d): - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): # PyTorch 1.9 does not support empty tensor inference yet if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)): out_shape = list(x.shape[:2]) @@ -162,7 +165,7 @@ class MaxPool3d(nn.MaxPool3d): class Linear(torch.nn.Linear): - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): # empty tensor forward of Linear layer is supported in Pytorch 1.6 if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 5)): out_shape = [x.shape[0], self.out_features] diff --git a/mmcv/cnn/builder.py b/mmcv/cnn/builder.py new file mode 100644 index 0000000..7567316 --- /dev/null +++ b/mmcv/cnn/builder.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from ..runner import Sequential +from ..utils import Registry, build_from_cfg + + +def build_model_from_cfg(cfg, registry, default_args=None): + """Build a PyTorch model from config dict(s). Different from + ``build_from_cfg``, if cfg is a list, a ``nn.Sequential`` will be built. + + Args: + cfg (dict, list[dict]): The config of modules, is is either a config + dict or a list of config dicts. If cfg is a list, a + the built modules will be wrapped with ``nn.Sequential``. + registry (:obj:`Registry`): A registry the module belongs to. + default_args (dict, optional): Default arguments to build the module. + Defaults to None. + + Returns: + nn.Module: A built nn module. + """ + if isinstance(cfg, list): + modules = [ + build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg + ] + return Sequential(*modules) + else: + return build_from_cfg(cfg, registry, default_args) + + +MODELS = Registry('model', build_func=build_model_from_cfg) diff --git a/mmcv/cnn/resnet.py b/mmcv/cnn/resnet.py index 8fc6abf..1cb3ac0 100644 --- a/mmcv/cnn/resnet.py +++ b/mmcv/cnn/resnet.py @@ -1,18 +1,13 @@ # Copyright (c) OpenMMLab. All rights reserved. import logging -from typing import Optional, Sequence, Tuple, Union import torch.nn as nn import torch.utils.checkpoint as cp -from mmengine.model import constant_init, kaiming_init -from mmengine.runner import load_checkpoint -from torch import Tensor +from .utils import constant_init, kaiming_init -def conv3x3(in_planes: int, - out_planes: int, - stride: int = 1, - dilation: int = 1): + +def conv3x3(in_planes, out_planes, stride=1, dilation=1): """3x3 convolution with padding.""" return nn.Conv2d( in_planes, @@ -28,14 +23,14 @@ class BasicBlock(nn.Module): expansion = 1 def __init__(self, - inplanes: int, - planes: int, - stride: int = 1, - dilation: int = 1, - downsample: Optional[nn.Module] = None, - style: str = 'pytorch', - with_cp: bool = False): - super().__init__() + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): + super(BasicBlock, self).__init__() assert style in ['pytorch', 'caffe'] self.conv1 = conv3x3(inplanes, planes, stride, dilation) self.bn1 = nn.BatchNorm2d(planes) @@ -47,7 +42,7 @@ class BasicBlock(nn.Module): self.dilation = dilation assert not with_cp - def forward(self, x: Tensor) -> Tensor: + def forward(self, x): residual = x out = self.conv1(x) @@ -70,19 +65,19 @@ class Bottleneck(nn.Module): expansion = 4 def __init__(self, - inplanes: int, - planes: int, - stride: int = 1, - dilation: int = 1, - downsample: Optional[nn.Module] = None, - style: str = 'pytorch', - with_cp: bool = False): + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False): """Bottleneck block. If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is "caffe", the stride-two layer is the first 1x1 conv layer. """ - super().__init__() + super(Bottleneck, self).__init__() assert style in ['pytorch', 'caffe'] if style == 'pytorch': conv1_stride = 1 @@ -112,7 +107,7 @@ class Bottleneck(nn.Module): self.dilation = dilation self.with_cp = with_cp - def forward(self, x: Tensor) -> Tensor: + def forward(self, x): def _inner_forward(x): residual = x @@ -145,14 +140,14 @@ class Bottleneck(nn.Module): return out -def make_res_layer(block: nn.Module, - inplanes: int, - planes: int, - blocks: int, - stride: int = 1, - dilation: int = 1, - style: str = 'pytorch', - with_cp: bool = False) -> nn.Module: +def make_res_layer(block, + inplanes, + planes, + blocks, + stride=1, + dilation=1, + style='pytorch', + with_cp=False): downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( @@ -213,22 +208,22 @@ class ResNet(nn.Module): } def __init__(self, - depth: int, - num_stages: int = 4, - strides: Sequence[int] = (1, 2, 2, 2), - dilations: Sequence[int] = (1, 1, 1, 1), - out_indices: Sequence[int] = (0, 1, 2, 3), - style: str = 'pytorch', - frozen_stages: int = -1, - bn_eval: bool = True, - bn_frozen: bool = False, - with_cp: bool = False): - super().__init__() + depth, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + with_cp=False): + super(ResNet, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for resnet') assert num_stages >= 1 and num_stages <= 4 block, stage_blocks = self.arch_settings[depth] - stage_blocks = stage_blocks[:num_stages] # type: ignore + stage_blocks = stage_blocks[:num_stages] assert len(strides) == len(dilations) == num_stages assert max(out_indices) < num_stages @@ -239,7 +234,7 @@ class ResNet(nn.Module): self.bn_frozen = bn_frozen self.with_cp = with_cp - self.inplanes: int = 64 + self.inplanes = 64 self.conv1 = nn.Conv2d( 3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) @@ -260,17 +255,17 @@ class ResNet(nn.Module): dilation=dilation, style=self.style, with_cp=with_cp) - self.inplanes = planes * block.expansion # type: ignore + self.inplanes = planes * block.expansion layer_name = f'layer{i + 1}' self.add_module(layer_name, res_layer) self.res_layers.append(layer_name) - self.feat_dim = block.expansion * 64 * 2**( # type: ignore - len(stage_blocks) - 1) + self.feat_dim = block.expansion * 64 * 2**(len(stage_blocks) - 1) - def init_weights(self, pretrained: Optional[str] = None) -> None: + def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() + from ..runner import load_checkpoint load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): @@ -281,7 +276,7 @@ class ResNet(nn.Module): else: raise TypeError('pretrained must be a str or None') - def forward(self, x: Tensor) -> Union[Tensor, Tuple[Tensor]]: + def forward(self, x): x = self.conv1(x) x = self.bn1(x) x = self.relu(x) @@ -297,8 +292,8 @@ class ResNet(nn.Module): else: return tuple(outs) - def train(self, mode: bool = True) -> None: - super().train(mode) + def train(self, mode=True): + super(ResNet, self).train(mode) if self.bn_eval: for m in self.modules(): if isinstance(m, nn.BatchNorm2d): diff --git a/mmcv/cnn/rfsearch/__init__.py b/mmcv/cnn/rfsearch/__init__.py deleted file mode 100644 index 04d4572..0000000 --- a/mmcv/cnn/rfsearch/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from .operator import BaseConvRFSearchOp, Conv2dRFSearchOp -from .search import RFSearchHook - -__all__ = ['BaseConvRFSearchOp', 'Conv2dRFSearchOp', 'RFSearchHook'] diff --git a/mmcv/cnn/rfsearch/operator.py b/mmcv/cnn/rfsearch/operator.py deleted file mode 100644 index 2fa45ab..0000000 --- a/mmcv/cnn/rfsearch/operator.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import copy - -import numpy as np -import torch -import torch.nn as nn -from mmengine.logging import print_log -from mmengine.model import BaseModule -from torch import Tensor - -from .utils import expand_rates, get_single_padding - - -class BaseConvRFSearchOp(BaseModule): - """Based class of ConvRFSearchOp. - - Args: - op_layer (nn.Module): pytorch module, e,g, Conv2d - global_config (dict): config dict. - """ - - def __init__(self, op_layer: nn.Module, global_config: dict): - super().__init__() - self.op_layer = op_layer - self.global_config = global_config - - def normlize(self, weights: nn.Parameter) -> nn.Parameter: - """Normalize weights. - - Args: - weights (nn.Parameter): Weights to be normalized. - - Returns: - nn.Parameters: Normalized weights. - """ - abs_weights = torch.abs(weights) - normalized_weights = abs_weights / torch.sum(abs_weights) - return normalized_weights - - -class Conv2dRFSearchOp(BaseConvRFSearchOp): - """Enable Conv2d with receptive field searching ability. - - Args: - op_layer (nn.Module): pytorch module, e,g, Conv2d - global_config (dict): config dict. Defaults to None. - By default this must include: - - - "init_alphas": The value for initializing weights of each branch. - - "num_branches": The controller of the size of - search space (the number of branches). - - "exp_rate": The controller of the sparsity of search space. - - "mmin": The minimum dilation rate. - - "mmax": The maximum dilation rate. - - Extra keys may exist, but are used by RFSearchHook, e.g., "step", - "max_step", "search_interval", and "skip_layer". - verbose (bool): Determines whether to print rf-next - related logging messages. - Defaults to True. - """ - - def __init__(self, - op_layer: nn.Module, - global_config: dict, - verbose: bool = True): - super().__init__(op_layer, global_config) - assert global_config is not None, 'global_config is None' - self.num_branches = global_config['num_branches'] - assert self.num_branches in [2, 3] - self.verbose = verbose - init_dilation = op_layer.dilation - self.dilation_rates = expand_rates(init_dilation, global_config) - if self.op_layer.kernel_size[ - 0] == 1 or self.op_layer.kernel_size[0] % 2 == 0: - self.dilation_rates = [(op_layer.dilation[0], r[1]) - for r in self.dilation_rates] - if self.op_layer.kernel_size[ - 1] == 1 or self.op_layer.kernel_size[1] % 2 == 0: - self.dilation_rates = [(r[0], op_layer.dilation[1]) - for r in self.dilation_rates] - - self.branch_weights = nn.Parameter(torch.Tensor(self.num_branches)) - if self.verbose: - print_log(f'Expand as {self.dilation_rates}', 'current') - nn.init.constant_(self.branch_weights, global_config['init_alphas']) - - def forward(self, input: Tensor) -> Tensor: - norm_w = self.normlize(self.branch_weights[:len(self.dilation_rates)]) - if len(self.dilation_rates) == 1: - outputs = [ - nn.functional.conv2d( - input, - weight=self.op_layer.weight, - bias=self.op_layer.bias, - stride=self.op_layer.stride, - padding=self.get_padding(self.dilation_rates[0]), - dilation=self.dilation_rates[0], - groups=self.op_layer.groups, - ) - ] - else: - outputs = [ - nn.functional.conv2d( - input, - weight=self.op_layer.weight, - bias=self.op_layer.bias, - stride=self.op_layer.stride, - padding=self.get_padding(r), - dilation=r, - groups=self.op_layer.groups, - ) * norm_w[i] for i, r in enumerate(self.dilation_rates) - ] - output = outputs[0] - for i in range(1, len(self.dilation_rates)): - output += outputs[i] - return output - - def estimate_rates(self) -> None: - """Estimate new dilation rate based on trained branch_weights.""" - norm_w = self.normlize(self.branch_weights[:len(self.dilation_rates)]) - if self.verbose: - print_log( - 'Estimate dilation {} with weight {}.'.format( - self.dilation_rates, - norm_w.detach().cpu().numpy().tolist()), 'current') - - sum0, sum1, w_sum = 0, 0, 0 - for i in range(len(self.dilation_rates)): - sum0 += norm_w[i].item() * self.dilation_rates[i][0] - sum1 += norm_w[i].item() * self.dilation_rates[i][1] - w_sum += norm_w[i].item() - estimated = [ - np.clip( - int(round(sum0 / w_sum)), self.global_config['mmin'], - self.global_config['mmax']).item(), - np.clip( - int(round(sum1 / w_sum)), self.global_config['mmin'], - self.global_config['mmax']).item() - ] - self.op_layer.dilation = tuple(estimated) - self.op_layer.padding = self.get_padding(self.op_layer.dilation) - self.dilation_rates = [tuple(estimated)] - if self.verbose: - print_log(f'Estimate as {tuple(estimated)}', 'current') - - def expand_rates(self) -> None: - """Expand dilation rate.""" - dilation = self.op_layer.dilation - dilation_rates = expand_rates(dilation, self.global_config) - if self.op_layer.kernel_size[ - 0] == 1 or self.op_layer.kernel_size[0] % 2 == 0: - dilation_rates = [(dilation[0], r[1]) for r in dilation_rates] - if self.op_layer.kernel_size[ - 1] == 1 or self.op_layer.kernel_size[1] % 2 == 0: - dilation_rates = [(r[0], dilation[1]) for r in dilation_rates] - - self.dilation_rates = copy.deepcopy(dilation_rates) - if self.verbose: - print_log(f'Expand as {self.dilation_rates}', 'current') - nn.init.constant_(self.branch_weights, - self.global_config['init_alphas']) - - def get_padding(self, dilation) -> tuple: - padding = (get_single_padding(self.op_layer.kernel_size[0], - self.op_layer.stride[0], dilation[0]), - get_single_padding(self.op_layer.kernel_size[1], - self.op_layer.stride[1], dilation[1])) - return padding diff --git a/mmcv/cnn/rfsearch/search.py b/mmcv/cnn/rfsearch/search.py deleted file mode 100644 index f4add4b..0000000 --- a/mmcv/cnn/rfsearch/search.py +++ /dev/null @@ -1,239 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os -from typing import Dict, Optional - -import mmengine -import torch # noqa -import torch.nn as nn -from mmengine.hooks import Hook -from mmengine.logging import print_log -from mmengine.registry import HOOKS - -from .operator import BaseConvRFSearchOp, Conv2dRFSearchOp # noqa -from .utils import get_single_padding, write_to_json - - -@HOOKS.register_module() -class RFSearchHook(Hook): - """Rcecptive field search via dilation rates. - - Please refer to `RF-Next: Efficient Receptive Field - Search for Convolutional Neural Networks - `_ for more details. - - - Args: - mode (str, optional): It can be set to the following types: - 'search', 'fixed_single_branch', or 'fixed_multi_branch'. - Defaults to 'search'. - config (Dict, optional): config dict of search. - By default this config contains "search", - and config["search"] must include: - - - "step": recording the current searching step. - - "max_step": The maximum number of searching steps - to update the structures. - - "search_interval": The interval (epoch/iteration) - between two updates. - - "exp_rate": The controller of the sparsity of search space. - - "init_alphas": The value for initializing weights of each branch. - - "mmin": The minimum dilation rate. - - "mmax": The maximum dilation rate. - - "num_branches": The controller of the size of - search space (the number of branches). - - "skip_layer": The modules in skip_layer will be ignored - during the receptive field search. - rfstructure_file (str, optional): Path to load searched receptive - fields of the model. Defaults to None. - by_epoch (bool, optional): Determine to perform step by epoch or - by iteration. If set to True, it will step by epoch. Otherwise, by - iteration. Defaults to True. - verbose (bool): Determines whether to print rf-next related logging - messages. Defaults to True. - """ - - def __init__(self, - mode: str = 'search', - config: Dict = {}, - rfstructure_file: Optional[str] = None, - by_epoch: bool = True, - verbose: bool = True): - assert mode in ['search', 'fixed_single_branch', 'fixed_multi_branch'] - assert config is not None - self.config = config - self.config['structure'] = {} - self.verbose = verbose - if rfstructure_file is not None: - rfstructure = mmengine.load(rfstructure_file)['structure'] - self.config['structure'] = rfstructure - self.mode = mode - self.num_branches = self.config['search']['num_branches'] - self.by_epoch = by_epoch - - def init_model(self, model: nn.Module): - """init model with search ability. - - Args: - model (nn.Module): pytorch model - - Raises: - NotImplementedError: only support three modes: - search/fixed_single_branch/fixed_multi_branch - """ - if self.verbose: - print_log('RFSearch init begin.', 'current') - if self.mode == 'search': - if self.config['structure']: - self.set_model(model, search_op='Conv2d') - self.wrap_model(model, search_op='Conv2d') - elif self.mode == 'fixed_single_branch': - self.set_model(model, search_op='Conv2d') - elif self.mode == 'fixed_multi_branch': - self.set_model(model, search_op='Conv2d') - self.wrap_model(model, search_op='Conv2d') - else: - raise NotImplementedError - if self.verbose: - print_log('RFSearch init end.', 'current') - - def after_train_epoch(self, runner): - """Performs a dilation searching step after one training epoch.""" - if self.by_epoch and self.mode == 'search': - self.step(runner.model, runner.work_dir) - - def after_train_iter(self, runner, batch_idx, data_batch, outputs): - """Performs a dilation searching step after one training iteration.""" - if not self.by_epoch and self.mode == 'search': - self.step(runner.model, runner.work_dir) - - def step(self, model: nn.Module, work_dir: str) -> None: - """Performs a dilation searching step. - - Args: - model (nn.Module): pytorch model - work_dir (str): Directory to save the searching results. - """ - self.config['search']['step'] += 1 - if (self.config['search']['step'] - ) % self.config['search']['search_interval'] == 0 and (self.config[ - 'search']['step']) < self.config['search']['max_step']: - self.estimate_and_expand(model) - for name, module in model.named_modules(): - if isinstance(module, BaseConvRFSearchOp): - self.config['structure'][name] = module.op_layer.dilation - - write_to_json( - self.config, - os.path.join( - work_dir, - 'local_search_config_step%d.json' % - self.config['search']['step'], - ), - ) - - def estimate_and_expand(self, model: nn.Module) -> None: - """estimate and search for RFConvOp. - - Args: - model (nn.Module): pytorch model - """ - for module in model.modules(): - if isinstance(module, BaseConvRFSearchOp): - module.estimate_rates() - module.expand_rates() - - def wrap_model(self, - model: nn.Module, - search_op: str = 'Conv2d', - prefix: str = '') -> None: - """wrap model to support searchable conv op. - - Args: - model (nn.Module): pytorch model - search_op (str): The module that uses RF search. - Defaults to 'Conv2d'. - init_rates (int, optional): Set to other initial dilation rates. - Defaults to None. - prefix (str): Prefix for function recursion. Defaults to ''. - """ - op = 'torch.nn.' + search_op - for name, module in model.named_children(): - if prefix == '': - fullname = 'module.' + name - else: - fullname = prefix + '.' + name - if self.config['search']['skip_layer'] is not None: - if any(layer in fullname - for layer in self.config['search']['skip_layer']): - continue - if isinstance(module, eval(op)): - if 1 < module.kernel_size[0] and \ - 0 != module.kernel_size[0] % 2 or \ - 1 < module.kernel_size[1] and \ - 0 != module.kernel_size[1] % 2: - moduleWrap = eval(search_op + 'RFSearchOp')( - module, self.config['search'], self.verbose) - moduleWrap = moduleWrap.to(module.weight.device) - if self.verbose: - print_log( - 'Wrap model %s to %s.' % - (str(module), str(moduleWrap)), 'current') - setattr(model, name, moduleWrap) - elif not isinstance(module, BaseConvRFSearchOp): - self.wrap_model(module, search_op, fullname) - - def set_model(self, - model: nn.Module, - search_op: str = 'Conv2d', - init_rates: Optional[int] = None, - prefix: str = '') -> None: - """set model based on config. - - Args: - model (nn.Module): pytorch model - config (Dict): config file - search_op (str): The module that uses RF search. - Defaults to 'Conv2d'. - init_rates (int, optional): Set to other initial dilation rates. - Defaults to None. - prefix (str): Prefix for function recursion. Defaults to ''. - """ - op = 'torch.nn.' + search_op - for name, module in model.named_children(): - if prefix == '': - fullname = 'module.' + name - else: - fullname = prefix + '.' + name - if self.config['search']['skip_layer'] is not None: - if any(layer in fullname - for layer in self.config['search']['skip_layer']): - continue - if isinstance(module, eval(op)): - if 1 < module.kernel_size[0] and \ - 0 != module.kernel_size[0] % 2 or \ - 1 < module.kernel_size[1] and \ - 0 != module.kernel_size[1] % 2: - if isinstance(self.config['structure'][fullname], int): - self.config['structure'][fullname] = [ - self.config['structure'][fullname], - self.config['structure'][fullname] - ] - module.dilation = ( - self.config['structure'][fullname][0], - self.config['structure'][fullname][1], - ) - module.padding = ( - get_single_padding( - module.kernel_size[0], module.stride[0], - self.config['structure'][fullname][0]), - get_single_padding( - module.kernel_size[1], module.stride[1], - self.config['structure'][fullname][1])) - setattr(model, name, module) - if self.verbose: - print_log( - 'Set module %s dilation as: [%d %d]' % - (fullname, module.dilation[0], module.dilation[1]), - 'current') - elif not isinstance(module, BaseConvRFSearchOp): - self.set_model(module, search_op, init_rates, fullname) diff --git a/mmcv/cnn/rfsearch/utils.py b/mmcv/cnn/rfsearch/utils.py deleted file mode 100644 index 4c8168e..0000000 --- a/mmcv/cnn/rfsearch/utils.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import mmengine -import numpy as np - - -def write_to_json(config: dict, filename: str): - """save config to json file. - - Args: - config (dict): Config to be saved. - filename (str): Path to save config. - """ - - with open(filename, 'w', encoding='utf-8') as f: - mmengine.dump(config, f, file_format='json') - - -def expand_rates(dilation: tuple, config: dict) -> list: - """expand dilation rate according to config. - - Args: - dilation (int): _description_ - config (dict): config dict - - Returns: - list: list of expanded dilation rates - """ - exp_rate = config['exp_rate'] - - large_rates = [] - small_rates = [] - for _ in range(config['num_branches'] // 2): - large_rates.append( - tuple([ - np.clip( - int(round((1 + exp_rate) * dilation[0])), config['mmin'], - config['mmax']).item(), - np.clip( - int(round((1 + exp_rate) * dilation[1])), config['mmin'], - config['mmax']).item() - ])) - small_rates.append( - tuple([ - np.clip( - int(round((1 - exp_rate) * dilation[0])), config['mmin'], - config['mmax']).item(), - np.clip( - int(round((1 - exp_rate) * dilation[1])), config['mmin'], - config['mmax']).item() - ])) - - small_rates.reverse() - - if config['num_branches'] % 2 == 0: - rate_list = small_rates + large_rates - else: - rate_list = small_rates + [dilation] + large_rates - - unique_rate_list = list(set(rate_list)) - unique_rate_list.sort(key=rate_list.index) - return unique_rate_list - - -def get_single_padding(kernel_size: int, - stride: int = 1, - dilation: int = 1) -> int: - padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 - return padding diff --git a/mmcv/cnn/utils/__init__.py b/mmcv/cnn/utils/__init__.py index cdec939..a263e31 100644 --- a/mmcv/cnn/utils/__init__.py +++ b/mmcv/cnn/utils/__init__.py @@ -1,5 +1,19 @@ # Copyright (c) OpenMMLab. All rights reserved. from .flops_counter import get_model_complexity_info from .fuse_conv_bn import fuse_conv_bn +from .sync_bn import revert_sync_batchnorm +from .weight_init import (INITIALIZERS, Caffe2XavierInit, ConstantInit, + KaimingInit, NormalInit, PretrainedInit, + TruncNormalInit, UniformInit, XavierInit, + bias_init_with_prob, caffe2_xavier_init, + constant_init, initialize, kaiming_init, normal_init, + trunc_normal_init, uniform_init, xavier_init) -__all__ = ['get_model_complexity_info', 'fuse_conv_bn'] +__all__ = [ + 'get_model_complexity_info', 'bias_init_with_prob', 'caffe2_xavier_init', + 'constant_init', 'kaiming_init', 'normal_init', 'trunc_normal_init', + 'uniform_init', 'xavier_init', 'fuse_conv_bn', 'initialize', + 'INITIALIZERS', 'ConstantInit', 'XavierInit', 'NormalInit', + 'TruncNormalInit', 'UniformInit', 'KaimingInit', 'PretrainedInit', + 'Caffe2XavierInit', 'revert_sync_batchnorm' +] diff --git a/mmcv/cnn/utils/flops_counter.py b/mmcv/cnn/utils/flops_counter.py index b09edbc..dceeb39 100644 --- a/mmcv/cnn/utils/flops_counter.py +++ b/mmcv/cnn/utils/flops_counter.py @@ -24,25 +24,22 @@ # SOFTWARE. import sys -import warnings from functools import partial -from typing import Any, Callable, Dict, Optional, TextIO, Tuple import numpy as np import torch import torch.nn as nn -from mmcv.cnn.bricks import (Conv2d, Conv3d, ConvTranspose2d, Linear, - MaxPool2d, MaxPool3d) +import mmcv -def get_model_complexity_info(model: nn.Module, - input_shape: tuple, - print_per_layer_stat: bool = True, - as_strings: bool = True, - input_constructor: Optional[Callable] = None, - flush: bool = False, - ost: TextIO = sys.stdout) -> tuple: +def get_model_complexity_info(model, + input_shape, + print_per_layer_stat=True, + as_strings=True, + input_constructor=None, + flush=False, + ost=sys.stdout): """Get complexity information of a model. This method can calculate FLOPs and parameter counts of a model with @@ -51,16 +48,16 @@ def get_model_complexity_info(model: nn.Module, Supported layers are listed as below: - Convolutions: ``nn.Conv1d``, ``nn.Conv2d``, ``nn.Conv3d``. - - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, - ``nn.LeakyReLU``, ``nn.ReLU6``. + - Activations: ``nn.ReLU``, ``nn.PReLU``, ``nn.ELU``, ``nn.LeakyReLU``, + ``nn.ReLU6``. - Poolings: ``nn.MaxPool1d``, ``nn.MaxPool2d``, ``nn.MaxPool3d``, - ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, - ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, - ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, - ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. + ``nn.AvgPool1d``, ``nn.AvgPool2d``, ``nn.AvgPool3d``, + ``nn.AdaptiveMaxPool1d``, ``nn.AdaptiveMaxPool2d``, + ``nn.AdaptiveMaxPool3d``, ``nn.AdaptiveAvgPool1d``, + ``nn.AdaptiveAvgPool2d``, ``nn.AdaptiveAvgPool3d``. - BatchNorms: ``nn.BatchNorm1d``, ``nn.BatchNorm2d``, - ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``, - ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``. + ``nn.BatchNorm3d``, ``nn.GroupNorm``, ``nn.InstanceNorm1d``, + ``InstanceNorm2d``, ``InstanceNorm3d``, ``nn.LayerNorm``. - Linear: ``nn.Linear``. - Deconvolution: ``nn.ConvTranspose2d``. - Upsample: ``nn.Upsample``. @@ -81,8 +78,8 @@ def get_model_complexity_info(model: nn.Module, Returns: tuple[float | str]: If ``as_strings`` is set to True, it will return - FLOPs and parameter counts in a string format. otherwise, it will - return those in a float number format. + FLOPs and parameter counts in a string format. otherwise, it will + return those in a float number format. """ assert type(input_shape) is tuple assert len(input_shape) >= 1 @@ -118,9 +115,7 @@ def get_model_complexity_info(model: nn.Module, return flops_count, params_count -def flops_to_string(flops: float, - units: Optional[str] = 'GFLOPs', - precision: int = 2) -> str: +def flops_to_string(flops, units='GFLOPs', precision=2): """Convert FLOPs number into a string. Note that Here we take a multiply-add counts as one FLOP. @@ -163,9 +158,7 @@ def flops_to_string(flops: float, return str(flops) + ' FLOPs' -def params_to_string(num_params: float, - units: Optional[str] = None, - precision: int = 2) -> str: +def params_to_string(num_params, units=None, precision=2): """Convert parameter number into a string. Args: @@ -202,13 +195,13 @@ def params_to_string(num_params: float, return str(num_params) -def print_model_with_flops(model: nn.Module, - total_flops: float, - total_params: float, - units: Optional[str] = 'GFLOPs', - precision: int = 3, - ost: TextIO = sys.stdout, - flush: bool = False) -> None: +def print_model_with_flops(model, + total_flops, + total_params, + units='GFLOPs', + precision=3, + ost=sys.stdout, + flush=False): """Print a model with FLOPs for each layer. Args: @@ -283,10 +276,10 @@ def print_model_with_flops(model: nn.Module, return ', '.join([ params_to_string( accumulated_num_params, units='M', precision=precision), - f'{accumulated_num_params / total_params:.3%} Params', + '{:.3%} Params'.format(accumulated_num_params / total_params), flops_to_string( accumulated_flops_cost, units=units, precision=precision), - f'{accumulated_flops_cost / total_flops:.3%} FLOPs', + '{:.3%} FLOPs'.format(accumulated_flops_cost / total_flops), self.original_extra_repr() ]) @@ -311,7 +304,7 @@ def print_model_with_flops(model: nn.Module, model.apply(del_extra_repr) -def get_model_parameters_number(model: nn.Module) -> float: +def get_model_parameters_number(model): """Calculate parameter number of a model. Args: @@ -324,16 +317,16 @@ def get_model_parameters_number(model: nn.Module) -> float: return num_params -def add_flops_counting_methods(net_main_module: nn.Module) -> nn.Module: +def add_flops_counting_methods(net_main_module): # adding additional methods to the existing module object, # this is done this way so that each function has access to self object - net_main_module.start_flops_count = start_flops_count.__get__( # type: ignore # noqa E501 + net_main_module.start_flops_count = start_flops_count.__get__( net_main_module) - net_main_module.stop_flops_count = stop_flops_count.__get__( # type: ignore # noqa E501 + net_main_module.stop_flops_count = stop_flops_count.__get__( net_main_module) - net_main_module.reset_flops_count = reset_flops_count.__get__( # type: ignore # noqa E501 + net_main_module.reset_flops_count = reset_flops_count.__get__( net_main_module) - net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # type: ignore # noqa E501 + net_main_module.compute_average_flops_cost = compute_average_flops_cost.__get__( # noqa: E501 net_main_module) net_main_module.reset_flops_count() @@ -341,7 +334,7 @@ def add_flops_counting_methods(net_main_module: nn.Module) -> nn.Module: return net_main_module -def compute_average_flops_cost(self) -> Tuple[float, float]: +def compute_average_flops_cost(self): """Compute average FLOPs cost. A method to compute average FLOPs cost, which will be available after @@ -359,7 +352,7 @@ def compute_average_flops_cost(self) -> Tuple[float, float]: return flops_sum / batches_count, params_sum -def start_flops_count(self) -> None: +def start_flops_count(self): """Activate the computation of mean flops consumption per image. A method to activate the computation of mean flops consumption per image. @@ -368,7 +361,7 @@ def start_flops_count(self) -> None: """ add_batch_counter_hook_function(self) - def add_flops_counter_hook_function(module: nn.Module) -> None: + def add_flops_counter_hook_function(module): if is_supported_instance(module): if hasattr(module, '__flops_handle__'): return @@ -382,7 +375,7 @@ def start_flops_count(self) -> None: self.apply(partial(add_flops_counter_hook_function)) -def stop_flops_count(self) -> None: +def stop_flops_count(self): """Stop computing the mean flops consumption per image. A method to stop computing the mean flops consumption per image, which will @@ -393,7 +386,7 @@ def stop_flops_count(self) -> None: self.apply(remove_flops_counter_hook_function) -def reset_flops_count(self) -> None: +def reset_flops_count(self): """Reset statistics computed so far. A method to Reset computed statistics, which will be available after @@ -404,13 +397,11 @@ def reset_flops_count(self) -> None: # ---- Internal functions -def empty_flops_counter_hook(module: nn.Module, input: tuple, - output: Any) -> None: +def empty_flops_counter_hook(module, input, output): module.__flops__ += 0 -def upsample_flops_counter_hook(module: nn.Module, input: tuple, - output: torch.Tensor) -> None: +def upsample_flops_counter_hook(module, input, output): output_size = output[0] batch_size = output_size.shape[0] output_elements_count = batch_size @@ -419,38 +410,39 @@ def upsample_flops_counter_hook(module: nn.Module, input: tuple, module.__flops__ += int(output_elements_count) -def relu_flops_counter_hook(module: nn.Module, input: tuple, - output: torch.Tensor) -> None: +def relu_flops_counter_hook(module, input, output): active_elements_count = output.numel() module.__flops__ += int(active_elements_count) -def linear_flops_counter_hook(module: nn.Module, input: tuple, - output: torch.Tensor) -> None: +def linear_flops_counter_hook(module, input, output): + input = input[0] output_last_dim = output.shape[ -1] # pytorch checks dimensions, so here we don't care much - module.__flops__ += int(np.prod(input[0].shape) * output_last_dim) + module.__flops__ += int(np.prod(input.shape) * output_last_dim) -def pool_flops_counter_hook(module: nn.Module, input: tuple, - output: torch.Tensor) -> None: - module.__flops__ += int(np.prod(input[0].shape)) +def pool_flops_counter_hook(module, input, output): + input = input[0] + module.__flops__ += int(np.prod(input.shape)) -def norm_flops_counter_hook(module: nn.Module, input: tuple, - output: torch.Tensor) -> None: - batch_flops = np.prod(input[0].shape) +def norm_flops_counter_hook(module, input, output): + input = input[0] + + batch_flops = np.prod(input.shape) if (getattr(module, 'affine', False) or getattr(module, 'elementwise_affine', False)): batch_flops *= 2 module.__flops__ += int(batch_flops) -def deconv_flops_counter_hook(conv_module: nn.Module, input: tuple, - output: torch.Tensor) -> None: +def deconv_flops_counter_hook(conv_module, input, output): # Can have multiple inputs, getting the first one - batch_size = input[0].shape[0] - input_height, input_width = input[0].shape[2:] + input = input[0] + + batch_size = input.shape[0] + input_height, input_width = input.shape[2:] kernel_height, kernel_width = conv_module.kernel_size in_channels = conv_module.in_channels @@ -466,16 +458,17 @@ def deconv_flops_counter_hook(conv_module: nn.Module, input: tuple, bias_flops = 0 if conv_module.bias is not None: output_height, output_width = output.shape[2:] - bias_flops = out_channels * batch_size * output_height * output_width + bias_flops = out_channels * batch_size * output_height * output_height overall_flops = overall_conv_flops + bias_flops conv_module.__flops__ += int(overall_flops) -def conv_flops_counter_hook(conv_module: nn.Module, input: tuple, - output: torch.Tensor) -> None: +def conv_flops_counter_hook(conv_module, input, output): # Can have multiple inputs, getting the first one - batch_size = input[0].shape[0] + input = input[0] + + batch_size = input.shape[0] output_dims = list(output.shape[2:]) kernel_dims = list(conv_module.kernel_size) @@ -502,23 +495,25 @@ def conv_flops_counter_hook(conv_module: nn.Module, input: tuple, conv_module.__flops__ += int(overall_flops) -def batch_counter_hook(module: nn.Module, input: tuple, output: Any) -> None: +def batch_counter_hook(module, input, output): batch_size = 1 if len(input) > 0: # Can have multiple inputs, getting the first one - batch_size = len(input[0]) + input = input[0] + batch_size = len(input) else: - warnings.warn('No positional inputs found for a module, ' - 'assuming batch size is 1.') + pass + print('Warning! No positional inputs found for a module, ' + 'assuming batch size is 1.') module.__batch_counter__ += batch_size -def add_batch_counter_variables_or_reset(module: nn.Module) -> None: +def add_batch_counter_variables_or_reset(module): module.__batch_counter__ = 0 -def add_batch_counter_hook_function(module: nn.Module) -> None: +def add_batch_counter_hook_function(module): if hasattr(module, '__batch_counter_handle__'): return @@ -526,43 +521,43 @@ def add_batch_counter_hook_function(module: nn.Module) -> None: module.__batch_counter_handle__ = handle -def remove_batch_counter_hook_function(module: nn.Module) -> None: +def remove_batch_counter_hook_function(module): if hasattr(module, '__batch_counter_handle__'): module.__batch_counter_handle__.remove() del module.__batch_counter_handle__ -def add_flops_counter_variable_or_reset(module: nn.Module) -> None: +def add_flops_counter_variable_or_reset(module): if is_supported_instance(module): if hasattr(module, '__flops__') or hasattr(module, '__params__'): - warnings.warn('variables __flops__ or __params__ are already ' - 'defined for the module' + type(module).__name__ + - ' ptflops can affect your code!') + print('Warning: variables __flops__ or __params__ are already ' + 'defined for the module' + type(module).__name__ + + ' ptflops can affect your code!') module.__flops__ = 0 module.__params__ = get_model_parameters_number(module) -def is_supported_instance(module: nn.Module) -> bool: +def is_supported_instance(module): if type(module) in get_modules_mapping(): return True return False -def remove_flops_counter_hook_function(module: nn.Module) -> None: +def remove_flops_counter_hook_function(module): if is_supported_instance(module): if hasattr(module, '__flops_handle__'): module.__flops_handle__.remove() del module.__flops_handle__ -def get_modules_mapping() -> Dict: +def get_modules_mapping(): return { # convolutions nn.Conv1d: conv_flops_counter_hook, nn.Conv2d: conv_flops_counter_hook, - Conv2d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv2d: conv_flops_counter_hook, nn.Conv3d: conv_flops_counter_hook, - Conv3d: conv_flops_counter_hook, + mmcv.cnn.bricks.Conv3d: conv_flops_counter_hook, # activations nn.ReLU: relu_flops_counter_hook, nn.PReLU: relu_flops_counter_hook, @@ -574,9 +569,9 @@ def get_modules_mapping() -> Dict: nn.AvgPool1d: pool_flops_counter_hook, nn.AvgPool2d: pool_flops_counter_hook, nn.MaxPool2d: pool_flops_counter_hook, - MaxPool2d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool2d: pool_flops_counter_hook, nn.MaxPool3d: pool_flops_counter_hook, - MaxPool3d: pool_flops_counter_hook, + mmcv.cnn.bricks.MaxPool3d: pool_flops_counter_hook, nn.AvgPool3d: pool_flops_counter_hook, nn.AdaptiveMaxPool1d: pool_flops_counter_hook, nn.AdaptiveAvgPool1d: pool_flops_counter_hook, @@ -595,10 +590,10 @@ def get_modules_mapping() -> Dict: nn.LayerNorm: norm_flops_counter_hook, # FC nn.Linear: linear_flops_counter_hook, - Linear: linear_flops_counter_hook, + mmcv.cnn.bricks.Linear: linear_flops_counter_hook, # Upscale nn.Upsample: upsample_flops_counter_hook, # Deconvolution nn.ConvTranspose2d: deconv_flops_counter_hook, - ConvTranspose2d: deconv_flops_counter_hook, + mmcv.cnn.bricks.ConvTranspose2d: deconv_flops_counter_hook, } diff --git a/mmcv/cnn/utils/fuse_conv_bn.py b/mmcv/cnn/utils/fuse_conv_bn.py index 6ccaab3..cb7076f 100644 --- a/mmcv/cnn/utils/fuse_conv_bn.py +++ b/mmcv/cnn/utils/fuse_conv_bn.py @@ -3,7 +3,7 @@ import torch import torch.nn as nn -def _fuse_conv_bn(conv: nn.Module, bn: nn.Module) -> nn.Module: +def _fuse_conv_bn(conv, bn): """Fuse conv and bn into one module. Args: @@ -24,7 +24,7 @@ def _fuse_conv_bn(conv: nn.Module, bn: nn.Module) -> nn.Module: return conv -def fuse_conv_bn(module: nn.Module) -> nn.Module: +def fuse_conv_bn(module): """Recursively fuse conv and bn in a module. During inference, the functionary of batch norm layers is turned off diff --git a/mmcv/cnn/utils/sync_bn.py b/mmcv/cnn/utils/sync_bn.py new file mode 100644 index 0000000..8a79ff4 --- /dev/null +++ b/mmcv/cnn/utils/sync_bn.py @@ -0,0 +1,59 @@ +import torch + +import mmcv + + +class _BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): + """A general BatchNorm layer without input dimension check. + + Reproduced from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc + is `_check_input_dim` that is designed for tensor sanity checks. + The check has been bypassed in this class for the convenience of converting + SyncBatchNorm. + """ + + def _check_input_dim(self, input): + return + + +def revert_sync_batchnorm(module): + """Helper function to convert all `SyncBatchNorm` (SyncBN) and + `mmcv.ops.sync_bn.SyncBatchNorm`(MMSyncBN) layers in the model to + `BatchNormXd` layers. + + Adapted from @kapily's work: + (https://github.com/pytorch/pytorch/issues/41081#issuecomment-783961547) + + Args: + module (nn.Module): The module containing `SyncBatchNorm` layers. + + Returns: + module_output: The converted module with `BatchNormXd` layers. + """ + module_output = module + module_checklist = [torch.nn.modules.batchnorm.SyncBatchNorm] + if hasattr(mmcv, 'ops'): + module_checklist.append(mmcv.ops.SyncBatchNorm) + if isinstance(module, tuple(module_checklist)): + module_output = _BatchNormXd(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + # no_grad() may not be needed here but + # just to be consistent with `convert_sync_batchnorm()` + with torch.no_grad(): + module_output.weight = module.weight + module_output.bias = module.bias + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + module_output.training = module.training + # qconfig exists in quantized models + if hasattr(module, 'qconfig'): + module_output.qconfig = module.qconfig + for name, child in module.named_children(): + module_output.add_module(name, revert_sync_batchnorm(child)) + del module + return module_output diff --git a/mmcv/cnn/utils/weight_init.py b/mmcv/cnn/utils/weight_init.py new file mode 100644 index 0000000..e1ac999 --- /dev/null +++ b/mmcv/cnn/utils/weight_init.py @@ -0,0 +1,684 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import math +import warnings + +import numpy as np +import torch +import torch.nn as nn +from torch import Tensor + +from mmcv.utils import Registry, build_from_cfg, get_logger, print_log + +INITIALIZERS = Registry('initializer') + + +def update_init_info(module, init_info): + """Update the `_params_init_info` in the module if the value of parameters + are changed. + + Args: + module (obj:`nn.Module`): The module of PyTorch with a user-defined + attribute `_params_init_info` which records the initialization + information. + init_info (str): The string that describes the initialization. + """ + assert hasattr( + module, + '_params_init_info'), f'Can not find `_params_init_info` in {module}' + for name, param in module.named_parameters(): + + assert param in module._params_init_info, ( + f'Find a new :obj:`Parameter` ' + f'named `{name}` during executing the ' + f'`init_weights` of ' + f'`{module.__class__.__name__}`. ' + f'Please do not add or ' + f'replace parameters during executing ' + f'the `init_weights`. ') + + # The parameter has been changed during executing the + # `init_weights` of module + mean_value = param.data.mean() + if module._params_init_info[param]['tmp_mean_value'] != mean_value: + module._params_init_info[param]['init_info'] = init_info + module._params_init_info[param]['tmp_mean_value'] = mean_value + + +def constant_init(module, val, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.constant_(module.weight, val) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def xavier_init(module, gain=1, bias=0, distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.xavier_uniform_(module.weight, gain=gain) + else: + nn.init.xavier_normal_(module.weight, gain=gain) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def normal_init(module, mean=0, std=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.normal_(module.weight, mean, std) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def trunc_normal_init(module: nn.Module, + mean: float = 0, + std: float = 1, + a: float = -2, + b: float = 2, + bias: float = 0) -> None: + if hasattr(module, 'weight') and module.weight is not None: + trunc_normal_(module.weight, mean, std, a, b) # type: ignore + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) # type: ignore + + +def uniform_init(module, a=0, b=1, bias=0): + if hasattr(module, 'weight') and module.weight is not None: + nn.init.uniform_(module.weight, a, b) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def kaiming_init(module, + a=0, + mode='fan_out', + nonlinearity='relu', + bias=0, + distribution='normal'): + assert distribution in ['uniform', 'normal'] + if hasattr(module, 'weight') and module.weight is not None: + if distribution == 'uniform': + nn.init.kaiming_uniform_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + else: + nn.init.kaiming_normal_( + module.weight, a=a, mode=mode, nonlinearity=nonlinearity) + if hasattr(module, 'bias') and module.bias is not None: + nn.init.constant_(module.bias, bias) + + +def caffe2_xavier_init(module, bias=0): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + kaiming_init( + module, + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + bias=bias, + distribution='uniform') + + +def bias_init_with_prob(prior_prob): + """initialize conv/fc bias value according to a given probability value.""" + bias_init = float(-np.log((1 - prior_prob) / prior_prob)) + return bias_init + + +def _get_bases_name(m): + return [b.__name__ for b in m.__class__.__bases__] + + +class BaseInit(object): + + def __init__(self, *, bias=0, bias_prob=None, layer=None): + self.wholemodule = False + if not isinstance(bias, (int, float)): + raise TypeError(f'bias must be a number, but got a {type(bias)}') + + if bias_prob is not None: + if not isinstance(bias_prob, float): + raise TypeError(f'bias_prob type must be float, \ + but got {type(bias_prob)}') + + if layer is not None: + if not isinstance(layer, (str, list)): + raise TypeError(f'layer must be a str or a list of str, \ + but got a {type(layer)}') + else: + layer = [] + + if bias_prob is not None: + self.bias = bias_init_with_prob(bias_prob) + else: + self.bias = bias + self.layer = [layer] if isinstance(layer, str) else layer + + def _get_init_info(self): + info = f'{self.__class__.__name__}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Constant') +class ConstantInit(BaseInit): + """Initialize module parameters with constant values. + + Args: + val (int | float): the value to fill the weights in the module with + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, val, **kwargs): + super().__init__(**kwargs) + self.val = val + + def __call__(self, module): + + def init(m): + if self.wholemodule: + constant_init(m, self.val, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + constant_init(m, self.val, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: val={self.val}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Xavier') +class XavierInit(BaseInit): + r"""Initialize module parameters with values according to the method + described in `Understanding the difficulty of training deep feedforward + neural networks - Glorot, X. & Bengio, Y. (2010). + `_ + + Args: + gain (int | float): an optional scaling factor. Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` + or ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, gain=1, distribution='normal', **kwargs): + super().__init__(**kwargs) + self.gain = gain + self.distribution = distribution + + def __call__(self, module): + + def init(m): + if self.wholemodule: + xavier_init(m, self.gain, self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + xavier_init(m, self.gain, self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: gain={self.gain}, ' \ + f'distribution={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Normal') +class NormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`. + + Args: + mean (int | float):the mean of the normal distribution. Defaults to 0. + std (int | float): the standard deviation of the normal distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, mean=0, std=1, **kwargs): + super().__init__(**kwargs) + self.mean = mean + self.std = std + + def __call__(self, module): + + def init(m): + if self.wholemodule: + normal_init(m, self.mean, self.std, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + normal_init(m, self.mean, self.std, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: mean={self.mean},' \ + f' std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='TruncNormal') +class TruncNormalInit(BaseInit): + r"""Initialize module parameters with the values drawn from the normal + distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` with values + outside :math:`[a, b]`. + + Args: + mean (float): the mean of the normal distribution. Defaults to 0. + std (float): the standard deviation of the normal distribution. + Defaults to 1. + a (float): The minimum cutoff value. + b ( float): The maximum cutoff value. + bias (float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + + """ + + def __init__(self, + mean: float = 0, + std: float = 1, + a: float = -2, + b: float = 2, + **kwargs) -> None: + super().__init__(**kwargs) + self.mean = mean + self.std = std + self.a = a + self.b = b + + def __call__(self, module: nn.Module) -> None: + + def init(m): + if self.wholemodule: + trunc_normal_init(m, self.mean, self.std, self.a, self.b, + self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + trunc_normal_init(m, self.mean, self.std, self.a, self.b, + self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a}, b={self.b},' \ + f' mean={self.mean}, std={self.std}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Uniform') +class UniformInit(BaseInit): + r"""Initialize module parameters with values drawn from the uniform + distribution :math:`\mathcal{U}(a, b)`. + + Args: + a (int | float): the lower bound of the uniform distribution. + Defaults to 0. + b (int | float): the upper bound of the uniform distribution. + Defaults to 1. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, a=0, b=1, **kwargs): + super().__init__(**kwargs) + self.a = a + self.b = b + + def __call__(self, module): + + def init(m): + if self.wholemodule: + uniform_init(m, self.a, self.b, self.bias) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + uniform_init(m, self.a, self.b, self.bias) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a},' \ + f' b={self.b}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Kaiming') +class KaimingInit(BaseInit): + r"""Initialize module parameters with the values according to the method + described in `Delving deep into rectifiers: Surpassing human-level + performance on ImageNet classification - He, K. et al. (2015). + `_ + + Args: + a (int | float): the negative slope of the rectifier used after this + layer (only used with ``'leaky_relu'``). Defaults to 0. + mode (str): either ``'fan_in'`` or ``'fan_out'``. Choosing + ``'fan_in'`` preserves the magnitude of the variance of the weights + in the forward pass. Choosing ``'fan_out'`` preserves the + magnitudes in the backwards pass. Defaults to ``'fan_out'``. + nonlinearity (str): the non-linear function (`nn.functional` name), + recommended to use only with ``'relu'`` or ``'leaky_relu'`` . + Defaults to 'relu'. + bias (int | float): the value to fill the bias. Defaults to 0. + bias_prob (float, optional): the probability for bias initialization. + Defaults to None. + distribution (str): distribution either be ``'normal'`` or + ``'uniform'``. Defaults to ``'normal'``. + layer (str | list[str], optional): the layer will be initialized. + Defaults to None. + """ + + def __init__(self, + a=0, + mode='fan_out', + nonlinearity='relu', + distribution='normal', + **kwargs): + super().__init__(**kwargs) + self.a = a + self.mode = mode + self.nonlinearity = nonlinearity + self.distribution = distribution + + def __call__(self, module): + + def init(m): + if self.wholemodule: + kaiming_init(m, self.a, self.mode, self.nonlinearity, + self.bias, self.distribution) + else: + layername = m.__class__.__name__ + basesname = _get_bases_name(m) + if len(set(self.layer) & set([layername] + basesname)): + kaiming_init(m, self.a, self.mode, self.nonlinearity, + self.bias, self.distribution) + + module.apply(init) + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: a={self.a}, mode={self.mode}, ' \ + f'nonlinearity={self.nonlinearity}, ' \ + f'distribution ={self.distribution}, bias={self.bias}' + return info + + +@INITIALIZERS.register_module(name='Caffe2Xavier') +class Caffe2XavierInit(KaimingInit): + # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch + # Acknowledgment to FAIR's internal code + def __init__(self, **kwargs): + super().__init__( + a=1, + mode='fan_in', + nonlinearity='leaky_relu', + distribution='uniform', + **kwargs) + + def __call__(self, module): + super().__call__(module) + + +@INITIALIZERS.register_module(name='Pretrained') +class PretrainedInit(object): + """Initialize module by loading a pretrained model. + + Args: + checkpoint (str): the checkpoint file of the pretrained model should + be load. + prefix (str, optional): the prefix of a sub-module in the pretrained + model. it is for loading a part of the pretrained model to + initialize. For example, if we would like to only load the + backbone of a detector model, we can set ``prefix='backbone.'``. + Defaults to None. + map_location (str): map tensors into proper locations. + """ + + def __init__(self, checkpoint, prefix=None, map_location=None): + self.checkpoint = checkpoint + self.prefix = prefix + self.map_location = map_location + + def __call__(self, module): + from mmcv.runner import (_load_checkpoint_with_prefix, load_checkpoint, + load_state_dict) + logger = get_logger('mmcv') + if self.prefix is None: + print_log(f'load model from: {self.checkpoint}', logger=logger) + load_checkpoint( + module, + self.checkpoint, + map_location=self.map_location, + strict=False, + logger=logger) + else: + print_log( + f'load {self.prefix} in model from: {self.checkpoint}', + logger=logger) + state_dict = _load_checkpoint_with_prefix( + self.prefix, self.checkpoint, map_location=self.map_location) + load_state_dict(module, state_dict, strict=False, logger=logger) + + if hasattr(module, '_params_init_info'): + update_init_info(module, init_info=self._get_init_info()) + + def _get_init_info(self): + info = f'{self.__class__.__name__}: load from {self.checkpoint}' + return info + + +def _initialize(module, cfg, wholemodule=False): + func = build_from_cfg(cfg, INITIALIZERS) + # wholemodule flag is for override mode, there is no layer key in override + # and initializer will give init values for the whole module with the name + # in override. + func.wholemodule = wholemodule + func(module) + + +def _initialize_override(module, override, cfg): + if not isinstance(override, (dict, list)): + raise TypeError(f'override must be a dict or a list of dict, \ + but got {type(override)}') + + override = [override] if isinstance(override, dict) else override + + for override_ in override: + + cp_override = copy.deepcopy(override_) + name = cp_override.pop('name', None) + if name is None: + raise ValueError('`override` must contain the key "name",' + f'but got {cp_override}') + # if override only has name key, it means use args in init_cfg + if not cp_override: + cp_override.update(cfg) + # if override has name key and other args except type key, it will + # raise error + elif 'type' not in cp_override.keys(): + raise ValueError( + f'`override` need "type" key, but got {cp_override}') + + if hasattr(module, name): + _initialize(getattr(module, name), cp_override, wholemodule=True) + else: + raise RuntimeError(f'module did not have attribute {name}, ' + f'but init_cfg is {cp_override}.') + + +def initialize(module, init_cfg): + """Initialize a module. + + Args: + module (``torch.nn.Module``): the module will be initialized. + init_cfg (dict | list[dict]): initialization configuration dict to + define initializer. OpenMMLab has implemented 6 initializers + including ``Constant``, ``Xavier``, ``Normal``, ``Uniform``, + ``Kaiming``, and ``Pretrained``. + Example: + >>> module = nn.Linear(2, 3, bias=True) + >>> init_cfg = dict(type='Constant', layer='Linear', val =1 , bias =2) + >>> initialize(module, init_cfg) + + >>> module = nn.Sequential(nn.Conv1d(3, 1, 3), nn.Linear(1,2)) + >>> # define key ``'layer'`` for initializing layer with different + >>> # configuration + >>> init_cfg = [dict(type='Constant', layer='Conv1d', val=1), + dict(type='Constant', layer='Linear', val=2)] + >>> initialize(module, init_cfg) + + >>> # define key``'override'`` to initialize some specific part in + >>> # module + >>> class FooNet(nn.Module): + >>> def __init__(self): + >>> super().__init__() + >>> self.feat = nn.Conv2d(3, 16, 3) + >>> self.reg = nn.Conv2d(16, 10, 3) + >>> self.cls = nn.Conv2d(16, 5, 3) + >>> model = FooNet() + >>> init_cfg = dict(type='Constant', val=1, bias=2, layer='Conv2d', + >>> override=dict(type='Constant', name='reg', val=3, bias=4)) + >>> initialize(model, init_cfg) + + >>> model = ResNet(depth=50) + >>> # Initialize weights with the pretrained model. + >>> init_cfg = dict(type='Pretrained', + checkpoint='torchvision://resnet50') + >>> initialize(model, init_cfg) + + >>> # Initialize weights of a sub-module with the specific part of + >>> # a pretrained model by using "prefix". + >>> url = 'http://download.openmmlab.com/mmdetection/v2.0/retinanet/'\ + >>> 'retinanet_r50_fpn_1x_coco/'\ + >>> 'retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth' + >>> init_cfg = dict(type='Pretrained', + checkpoint=url, prefix='backbone.') + """ + if not isinstance(init_cfg, (dict, list)): + raise TypeError(f'init_cfg must be a dict or a list of dict, \ + but got {type(init_cfg)}') + + if isinstance(init_cfg, dict): + init_cfg = [init_cfg] + + for cfg in init_cfg: + # should deeply copy the original config because cfg may be used by + # other modules, e.g., one init_cfg shared by multiple bottleneck + # blocks, the expected cfg will be changed after pop and will change + # the initialization behavior of other modules + cp_cfg = copy.deepcopy(cfg) + override = cp_cfg.pop('override', None) + _initialize(module, cp_cfg) + + if override is not None: + cp_cfg.pop('layer', None) + _initialize_override(module, override, cp_cfg) + else: + # All attributes in module have same initialization. + pass + + +def _no_grad_trunc_normal_(tensor: Tensor, mean: float, std: float, a: float, + b: float) -> Tensor: + # Method based on + # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf + # Modified from + # https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + def norm_cdf(x): + # Computes standard normal cumulative distribution function + return (1. + math.erf(x / math.sqrt(2.))) / 2. + + if (mean < a - 2 * std) or (mean > b + 2 * std): + warnings.warn( + 'mean is more than 2 std from [a, b] in nn.init.trunc_normal_. ' + 'The distribution of values may be incorrect.', + stacklevel=2) + + with torch.no_grad(): + # Values are generated by using a truncated uniform distribution and + # then using the inverse CDF for the normal distribution. + # Get upper and lower cdf values + lower = norm_cdf((a - mean) / std) + upper = norm_cdf((b - mean) / std) + + # Uniformly fill tensor with values from [lower, upper], then translate + # to [2lower-1, 2upper-1]. + tensor.uniform_(2 * lower - 1, 2 * upper - 1) + + # Use inverse cdf transform for normal distribution to get truncated + # standard normal + tensor.erfinv_() + + # Transform to proper mean, std + tensor.mul_(std * math.sqrt(2.)) + tensor.add_(mean) + + # Clamp to ensure it's in the proper range + tensor.clamp_(min=a, max=b) + return tensor + + +def trunc_normal_(tensor: Tensor, + mean: float = 0., + std: float = 1., + a: float = -2., + b: float = 2.) -> Tensor: + r"""Fills the input Tensor with values drawn from a truncated + normal distribution. The values are effectively drawn from the + normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` + with values outside :math:`[a, b]` redrawn until they are within + the bounds. The method used for generating the random values works + best when :math:`a \leq \text{mean} \leq b`. + + Modified from + https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py + + Args: + tensor (``torch.Tensor``): an n-dimensional `torch.Tensor`. + mean (float): the mean of the normal distribution. + std (float): the standard deviation of the normal distribution. + a (float): the minimum cutoff value. + b (float): the maximum cutoff value. + """ + return _no_grad_trunc_normal_(tensor, mean, std, a, b) diff --git a/mmcv/cnn/vgg.py b/mmcv/cnn/vgg.py index a7f3116..8778b64 100644 --- a/mmcv/cnn/vgg.py +++ b/mmcv/cnn/vgg.py @@ -1,14 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. import logging -from typing import List, Optional, Sequence, Tuple, Union import torch.nn as nn -from mmengine.model import constant_init, kaiming_init, normal_init -from mmengine.runner import load_checkpoint -from torch import Tensor +from .utils import constant_init, kaiming_init, normal_init -def conv3x3(in_planes: int, out_planes: int, dilation: int = 1) -> nn.Module: + +def conv3x3(in_planes, out_planes, dilation=1): """3x3 convolution with padding.""" return nn.Conv2d( in_planes, @@ -18,12 +16,12 @@ def conv3x3(in_planes: int, out_planes: int, dilation: int = 1) -> nn.Module: dilation=dilation) -def make_vgg_layer(inplanes: int, - planes: int, - num_blocks: int, - dilation: int = 1, - with_bn: bool = False, - ceil_mode: bool = False) -> List[nn.Module]: +def make_vgg_layer(inplanes, + planes, + num_blocks, + dilation=1, + with_bn=False, + ceil_mode=False): layers = [] for _ in range(num_blocks): layers.append(conv3x3(inplanes, planes, dilation)) @@ -61,18 +59,18 @@ class VGG(nn.Module): } def __init__(self, - depth: int, - with_bn: bool = False, - num_classes: int = -1, - num_stages: int = 5, - dilations: Sequence[int] = (1, 1, 1, 1, 1), - out_indices: Sequence[int] = (0, 1, 2, 3, 4), - frozen_stages: int = -1, - bn_eval: bool = True, - bn_frozen: bool = False, - ceil_mode: bool = False, - with_last_pool: bool = True): - super().__init__() + depth, + with_bn=False, + num_classes=-1, + num_stages=5, + dilations=(1, 1, 1, 1, 1), + out_indices=(0, 1, 2, 3, 4), + frozen_stages=-1, + bn_eval=True, + bn_frozen=False, + ceil_mode=False, + with_last_pool=True): + super(VGG, self).__init__() if depth not in self.arch_settings: raise KeyError(f'invalid depth {depth} for vgg') assert num_stages >= 1 and num_stages <= 5 @@ -124,9 +122,10 @@ class VGG(nn.Module): nn.Linear(4096, num_classes), ) - def init_weights(self, pretrained: Optional[str] = None) -> None: + def init_weights(self, pretrained=None): if isinstance(pretrained, str): logger = logging.getLogger() + from ..runner import load_checkpoint load_checkpoint(self, pretrained, strict=False, logger=logger) elif pretrained is None: for m in self.modules(): @@ -139,7 +138,7 @@ class VGG(nn.Module): else: raise TypeError('pretrained must be a str or None') - def forward(self, x: Tensor) -> Union[Tensor, Tuple[Tensor, ...]]: + def forward(self, x): outs = [] vgg_layers = getattr(self, self.module_name) for i in range(len(self.stage_blocks)): @@ -157,8 +156,8 @@ class VGG(nn.Module): else: return tuple(outs) - def train(self, mode: bool = True) -> None: - super().train(mode) + def train(self, mode=True): + super(VGG, self).train(mode) if self.bn_eval: for m in self.modules(): if isinstance(m, nn.BatchNorm2d): diff --git a/mmcv/engine/__init__.py b/mmcv/engine/__init__.py new file mode 100644 index 0000000..3193b7f --- /dev/null +++ b/mmcv/engine/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .test import (collect_results_cpu, collect_results_gpu, multi_gpu_test, + single_gpu_test) + +__all__ = [ + 'collect_results_cpu', 'collect_results_gpu', 'multi_gpu_test', + 'single_gpu_test' +] diff --git a/mmcv/engine/test.py b/mmcv/engine/test.py new file mode 100644 index 0000000..f236b1c --- /dev/null +++ b/mmcv/engine/test.py @@ -0,0 +1,202 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import pickle +import shutil +import tempfile +import time + +import torch +import torch.distributed as dist + +import mmcv +from mmcv.runner import get_dist_info + + +def single_gpu_test(model, data_loader): + """Test model with a single gpu. + + This method tests model with a single gpu and displays test progress bar. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + for data in data_loader: + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + # Assume result has the same length of batch_size + # refer to https://github.com/open-mmlab/mmcv/issues/985 + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + return results + + +def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): + """Test model with multiple gpus. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting + ``gpu_collect=True``, it encodes results to gpu tensors and use gpu + communication for results collection. On cpu mode it saves the results on + different gpus to ``tmpdir`` and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (nn.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + + Returns: + list: The prediction results. + """ + model.eval() + results = [] + dataset = data_loader.dataset + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + time.sleep(2) # This line can prevent deadlock problem in some cases. + for i, data in enumerate(data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + results.extend(result) + + if rank == 0: + batch_size = len(result) + batch_size_all = batch_size * world_size + if batch_size_all + prog_bar.completed > len(dataset): + batch_size_all = len(dataset) - prog_bar.completed + for _ in range(batch_size_all): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results + + +def collect_results_cpu(result_part, size, tmpdir=None): + """Collect results under cpu mode. + + On cpu mode, this function will save the results on different gpus to + ``tmpdir`` and collect them by the rank 0 worker. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + tmpdir (str | None): temporal directory for collected results to + store. If set to None, it will create a random temporal directory + for it. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # create a tmp dir if it is not specified + if tmpdir is None: + MAX_LEN = 512 + # 32 is whitespace + dir_tensor = torch.full((MAX_LEN, ), + 32, + dtype=torch.uint8, + device='cuda') + if rank == 0: + mmcv.mkdir_or_exist('.dist_test') + tmpdir = tempfile.mkdtemp(dir='.dist_test') + tmpdir = torch.tensor( + bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') + dir_tensor[:len(tmpdir)] = tmpdir + dist.broadcast(dir_tensor, 0) + tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() + else: + mmcv.mkdir_or_exist(tmpdir) + # dump the part result to the dir + mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl')) + dist.barrier() + # collect all parts + if rank != 0: + return None + else: + # load results of all parts from tmp dir + part_list = [] + for i in range(world_size): + part_file = osp.join(tmpdir, f'part_{i}.pkl') + part_result = mmcv.load(part_file) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + # remove tmp dir + shutil.rmtree(tmpdir) + return ordered_results + + +def collect_results_gpu(result_part, size): + """Collect results under gpu mode. + + On gpu mode, this function will encode results to gpu tensors and use gpu + communication for results collection. + + Args: + result_part (list): Result list containing result parts + to be collected. + size (int): Size of the results, commonly equal to length of + the results. + + Returns: + list: The collected results. + """ + rank, world_size = get_dist_info() + # dump result part to tensor with pickle + part_tensor = torch.tensor( + bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') + # gather all result part tensor shape + shape_tensor = torch.tensor(part_tensor.shape, device='cuda') + shape_list = [shape_tensor.clone() for _ in range(world_size)] + dist.all_gather(shape_list, shape_tensor) + # padding result part tensor to max length + shape_max = torch.tensor(shape_list).max() + part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') + part_send[:shape_tensor[0]] = part_tensor + part_recv_list = [ + part_tensor.new_zeros(shape_max) for _ in range(world_size) + ] + # gather all result part + dist.all_gather(part_recv_list, part_send) + + if rank == 0: + part_list = [] + for recv, shape in zip(part_recv_list, shape_list): + part_result = pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()) + # When data is severely insufficient, an empty part_result + # on a certain gpu could makes the overall outputs empty. + if part_result: + part_list.append(part_result) + # sort the results + ordered_results = [] + for res in zip(*part_list): + ordered_results.extend(list(res)) + # the dataloader may pad some samples + ordered_results = ordered_results[:size] + return ordered_results diff --git a/mmcv/fileio/__init__.py b/mmcv/fileio/__init__.py new file mode 100644 index 0000000..2051b85 --- /dev/null +++ b/mmcv/fileio/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .file_client import BaseStorageBackend, FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler +from .io import dump, load, register_handler +from .parse import dict_from_file, list_from_file + +__all__ = [ + 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler', + 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler', + 'list_from_file', 'dict_from_file' +] diff --git a/mmcv/fileio/file_client.py b/mmcv/fileio/file_client.py new file mode 100644 index 0000000..b2d6228 --- /dev/null +++ b/mmcv/fileio/file_client.py @@ -0,0 +1,1148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import os +import os.path as osp +import re +import tempfile +import warnings +from abc import ABCMeta, abstractmethod +from contextlib import contextmanager +from pathlib import Path +from typing import Iterable, Iterator, Optional, Tuple, Union +from urllib.request import urlopen + +import mmcv +from mmcv.utils.misc import has_method +from mmcv.utils.path import is_filepath + + +class BaseStorageBackend(metaclass=ABCMeta): + """Abstract class of storage backends. + + All backends need to implement two apis: ``get()`` and ``get_text()``. + ``get()`` reads the file as a byte stream and ``get_text()`` reads the file + as texts. + """ + + # a flag to indicate whether the backend can create a symlink for a file + _allow_symlink = False + + @property + def name(self): + return self.__class__.__name__ + + @property + def allow_symlink(self): + return self._allow_symlink + + @abstractmethod + def get(self, filepath): + pass + + @abstractmethod + def get_text(self, filepath): + pass + + +class CephBackend(BaseStorageBackend): + """Ceph storage backend (for internal use). + + Args: + path_mapping (dict|None): path mapping dict from local path to Petrel + path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath`` + will be replaced by ``dst``. Default: None. + + .. warning:: + :class:`mmcv.fileio.file_client.CephBackend` will be deprecated, + please use :class:`mmcv.fileio.file_client.PetrelBackend` instead. + """ + + def __init__(self, path_mapping=None): + try: + import ceph + except ImportError: + raise ImportError('Please install ceph to enable CephBackend.') + + warnings.warn( + 'CephBackend will be deprecated, please use PetrelBackend instead') + self._client = ceph.S3Client() + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def get(self, filepath): + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class PetrelBackend(BaseStorageBackend): + """Petrel storage backend (for internal use). + + PetrelBackend supports reading and writing data to multiple clusters. + If the file path contains the cluster name, PetrelBackend will read data + from specified cluster or write data to it. Otherwise, PetrelBackend will + access the default cluster. + + Args: + path_mapping (dict, optional): Path mapping dict from local path to + Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in + ``filepath`` will be replaced by ``dst``. Default: None. + enable_mc (bool, optional): Whether to enable memcached support. + Default: True. + + Examples: + >>> filepath1 = 's3://path/of/file' + >>> filepath2 = 'cluster-name:s3://path/of/file' + >>> client = PetrelBackend() + >>> client.get(filepath1) # get data from default cluster + >>> client.get(filepath2) # get data from 'cluster-name' cluster + """ + + def __init__(self, + path_mapping: Optional[dict] = None, + enable_mc: bool = True): + try: + from petrel_client import client + except ImportError: + raise ImportError('Please install petrel_client to enable ' + 'PetrelBackend.') + + self._client = client.Client(enable_mc=enable_mc) + assert isinstance(path_mapping, dict) or path_mapping is None + self.path_mapping = path_mapping + + def _map_path(self, filepath: Union[str, Path]) -> str: + """Map ``filepath`` to a string path whose prefix will be replaced by + :attr:`self.path_mapping`. + + Args: + filepath (str): Path to be mapped. + """ + filepath = str(filepath) + if self.path_mapping is not None: + for k, v in self.path_mapping.items(): + filepath = filepath.replace(k, v) + return filepath + + def _format_path(self, filepath: str) -> str: + """Convert a ``filepath`` to standard format of petrel oss. + + If the ``filepath`` is concatenated by ``os.path.join``, in a Windows + environment, the ``filepath`` will be the format of + 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the + above ``filepath`` will be converted to 's3://bucket_name/image.jpg'. + + Args: + filepath (str): Path to be formatted. + """ + return re.sub(r'\\+', '/', filepath) + + def get(self, filepath: Union[str, Path]) -> memoryview: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + memoryview: A memory view of expected bytes object to avoid + copying. The memoryview object can be converted to bytes by + ``value_buf.tobytes()``. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + value = self._client.Get(filepath) + value_buf = memoryview(value) + return value_buf + + def get_text(self, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return str(self.get(filepath), encoding=encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Save data to a given ``filepath``. + + Args: + obj (bytes): Data to be saved. + filepath (str or Path): Path to write data. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.put(filepath, obj) + + def put_text(self, + obj: str, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> None: + """Save data to a given ``filepath``. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to encode the ``obj``. + Default: 'utf-8'. + """ + self.put(bytes(obj, encoding=encoding), filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + if not has_method(self._client, 'delete'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `delete` method, please use a higher version or dev' + ' branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + self._client.delete(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + if not (has_method(self._client, 'contains') + and has_method(self._client, 'isdir')): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `contains` and `isdir` methods, please use a higher' + 'version or dev branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) or self._client.isdir(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + if not has_method(self._client, 'isdir'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `isdir` method, please use a higher version or dev' + ' branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + if not has_method(self._client, 'contains'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `contains` method, please use a higher version or ' + 'dev branch instead.')) + + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + return self._client.contains(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result after concatenation. + """ + filepath = self._format_path(self._map_path(filepath)) + if filepath.endswith('/'): + filepath = filepath[:-1] + formatted_paths = [filepath] + for path in filepaths: + formatted_paths.append(self._format_path(self._map_path(path))) + return '/'.join(formatted_paths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download a file from ``filepath`` and return a temporary path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str | Path): Download a file from ``filepath``. + + Examples: + >>> client = PetrelBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('s3://path/of/your/file') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one temporary path. + """ + filepath = self._map_path(filepath) + filepath = self._format_path(filepath) + assert self.isfile(filepath) + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + Petrel has no concept of directories but it simulates the directory + hierarchy in the filesystem through public prefixes. In addition, + if the returned path ends with '/', it means the path is a public + prefix which is a logical directory. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + In addition, the returned path of directory will not contains the + suffix '/' which is consistent with other backends. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if not has_method(self._client, 'list'): + raise NotImplementedError( + ('Current version of Petrel Python SDK has not supported ' + 'the `list` method, please use a higher version or dev' + ' branch instead.')) + + dir_path = self._map_path(dir_path) + dir_path = self._format_path(dir_path) + if list_dir and suffix is not None: + raise TypeError( + '`list_dir` should be False when `suffix` is not None') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + # Petrel's simulated directory hierarchy assumes that directory paths + # should end with `/` + if not dir_path.endswith('/'): + dir_path += '/' + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive): + for path in self._client.list(dir_path): + # the `self.isdir` is not used here to determine whether path + # is a directory, because `self.isdir` relies on + # `self._client.list` + if path.endswith('/'): # a directory path + next_dir_path = self.join_path(dir_path, path) + if list_dir: + # get the relative path and exclude the last + # character '/' + rel_dir = next_dir_path[len(root):-1] + yield rel_dir + if recursive: + yield from _list_dir_or_file(next_dir_path, list_dir, + list_file, suffix, + recursive) + else: # a file path + absolute_path = self.join_path(dir_path, path) + rel_path = absolute_path[len(root):] + if (suffix is None + or rel_path.endswith(suffix)) and list_file: + yield rel_path + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive) + + +class MemcachedBackend(BaseStorageBackend): + """Memcached storage backend. + + Attributes: + server_list_cfg (str): Config file for memcached server list. + client_cfg (str): Config file for memcached client. + sys_path (str | None): Additional path to be appended to `sys.path`. + Default: None. + """ + + def __init__(self, server_list_cfg, client_cfg, sys_path=None): + if sys_path is not None: + import sys + sys.path.append(sys_path) + try: + import mc + except ImportError: + raise ImportError( + 'Please install memcached to enable MemcachedBackend.') + + self.server_list_cfg = server_list_cfg + self.client_cfg = client_cfg + self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg, + self.client_cfg) + # mc.pyvector servers as a point which points to a memory cache + self._mc_buffer = mc.pyvector() + + def get(self, filepath): + filepath = str(filepath) + import mc + self._client.Get(filepath, self._mc_buffer) + value_buf = mc.ConvertBuffer(self._mc_buffer) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class LmdbBackend(BaseStorageBackend): + """Lmdb storage backend. + + Args: + db_path (str): Lmdb database path. + readonly (bool, optional): Lmdb environment parameter. If True, + disallow any write operations. Default: True. + lock (bool, optional): Lmdb environment parameter. If False, when + concurrent access occurs, do not lock the database. Default: False. + readahead (bool, optional): Lmdb environment parameter. If False, + disable the OS filesystem readahead mechanism, which may improve + random read performance when a database is larger than RAM. + Default: False. + + Attributes: + db_path (str): Lmdb database path. + """ + + def __init__(self, + db_path, + readonly=True, + lock=False, + readahead=False, + **kwargs): + try: + import lmdb + except ImportError: + raise ImportError('Please install lmdb to enable LmdbBackend.') + + self.db_path = str(db_path) + self._client = lmdb.open( + self.db_path, + readonly=readonly, + lock=lock, + readahead=readahead, + **kwargs) + + def get(self, filepath): + """Get values according to the filepath. + + Args: + filepath (str | obj:`Path`): Here, filepath is the lmdb key. + """ + filepath = str(filepath) + with self._client.begin(write=False) as txn: + value_buf = txn.get(filepath.encode('ascii')) + return value_buf + + def get_text(self, filepath, encoding=None): + raise NotImplementedError + + +class HardDiskBackend(BaseStorageBackend): + """Raw hard disks storage backend.""" + + _allow_symlink = True + + def get(self, filepath: Union[str, Path]) -> bytes: + """Read data from a given ``filepath`` with 'rb' mode. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes: Expected bytes object. + """ + with open(filepath, 'rb') as f: + value_buf = f.read() + return value_buf + + def get_text(self, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + with open(filepath, 'r', encoding=encoding) as f: + value_buf = f.read() + return value_buf + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` will create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'wb') as f: + f.write(obj) + + def put_text(self, + obj: str, + filepath: Union[str, Path], + encoding: str = 'utf-8') -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` will create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + """ + mmcv.mkdir_or_exist(osp.dirname(filepath)) + with open(filepath, 'w', encoding=encoding) as f: + f.write(obj) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str or Path): Path to be removed. + """ + os.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return osp.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return osp.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return osp.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return osp.join(filepath, *filepaths) + + @contextmanager + def get_local_path( + self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]: + """Only for unified API and do nothing.""" + yield filepath + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + if list_dir and suffix is not None: + raise TypeError('`suffix` should be None when `list_dir` is True') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('`suffix` must be a string or tuple of strings') + + root = dir_path + + def _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive): + for entry in os.scandir(dir_path): + if not entry.name.startswith('.') and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + if (suffix is None + or rel_path.endswith(suffix)) and list_file: + yield rel_path + elif osp.isdir(entry.path): + if list_dir: + rel_dir = osp.relpath(entry.path, root) + yield rel_dir + if recursive: + yield from _list_dir_or_file(entry.path, list_dir, + list_file, suffix, + recursive) + + return _list_dir_or_file(dir_path, list_dir, list_file, suffix, + recursive) + + +class HTTPBackend(BaseStorageBackend): + """HTTP and HTTPS storage bachend.""" + + def get(self, filepath): + value_buf = urlopen(filepath).read() + return value_buf + + def get_text(self, filepath, encoding='utf-8'): + value_buf = urlopen(filepath).read() + return value_buf.decode(encoding) + + @contextmanager + def get_local_path(self, filepath: str) -> Iterable[str]: + """Download a file from ``filepath``. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Args: + filepath (str): Download a file from ``filepath``. + + Examples: + >>> client = HTTPBackend() + >>> # After existing from the ``with`` clause, + >>> # the path will be removed + >>> with client.get_local_path('http://path/of/your/file') as path: + ... # do something here + """ + try: + f = tempfile.NamedTemporaryFile(delete=False) + f.write(self.get(filepath)) + f.close() + yield f.name + finally: + os.remove(f.name) + + +class FileClient: + """A general file client to access files in different backends. + + The client loads a file or text in a specified backend from its path + and returns it as a binary or text file. There are two ways to choose a + backend, the name of backend and the prefix of path. Although both of them + can be used to choose a storage backend, ``backend`` has a higher priority + that is if they are all set, the storage backend will be chosen by the + backend argument. If they are all `None`, the disk backend will be chosen. + Note that It can also register other backend accessor with a given name, + prefixes, and backend class. In addition, We use the singleton pattern to + avoid repeated object creation. If the arguments are the same, the same + object will be returned. + + Args: + backend (str, optional): The storage backend type. Options are "disk", + "ceph", "memcached", "lmdb", "http" and "petrel". Default: None. + prefix (str, optional): The prefix of the registered storage backend. + Options are "s3", "http", "https". Default: None. + + Examples: + >>> # only set backend + >>> file_client = FileClient(backend='petrel') + >>> # only set prefix + >>> file_client = FileClient(prefix='s3') + >>> # set both backend and prefix but use backend to choose client + >>> file_client = FileClient(backend='petrel', prefix='s3') + >>> # if the arguments are the same, the same object is returned + >>> file_client1 = FileClient(backend='petrel') + >>> file_client1 is file_client + True + + Attributes: + client (:obj:`BaseStorageBackend`): The backend object. + """ + + _backends = { + 'disk': HardDiskBackend, + 'ceph': CephBackend, + 'memcached': MemcachedBackend, + 'lmdb': LmdbBackend, + 'petrel': PetrelBackend, + 'http': HTTPBackend, + } + # This collection is used to record the overridden backends, and when a + # backend appears in the collection, the singleton pattern is disabled for + # that backend, because if the singleton pattern is used, then the object + # returned will be the backend before overwriting + _overridden_backends = set() + _prefix_to_backends = { + 's3': PetrelBackend, + 'http': HTTPBackend, + 'https': HTTPBackend, + } + _overridden_prefixes = set() + + _instances = {} + + def __new__(cls, backend=None, prefix=None, **kwargs): + if backend is None and prefix is None: + backend = 'disk' + if backend is not None and backend not in cls._backends: + raise ValueError( + f'Backend {backend} is not supported. Currently supported ones' + f' are {list(cls._backends.keys())}') + if prefix is not None and prefix not in cls._prefix_to_backends: + raise ValueError( + f'prefix {prefix} is not supported. Currently supported ones ' + f'are {list(cls._prefix_to_backends.keys())}') + + # concatenate the arguments to a unique key for determining whether + # objects with the same arguments were created + arg_key = f'{backend}:{prefix}' + for key, value in kwargs.items(): + arg_key += f':{key}:{value}' + + # if a backend was overridden, it will create a new object + if (arg_key in cls._instances + and backend not in cls._overridden_backends + and prefix not in cls._overridden_prefixes): + _instance = cls._instances[arg_key] + else: + # create a new object and put it to _instance + _instance = super().__new__(cls) + if backend is not None: + _instance.client = cls._backends[backend](**kwargs) + else: + _instance.client = cls._prefix_to_backends[prefix](**kwargs) + + cls._instances[arg_key] = _instance + + return _instance + + @property + def name(self): + return self.client.name + + @property + def allow_symlink(self): + return self.client.allow_symlink + + @staticmethod + def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]: + """Parse the prefix of a uri. + + Args: + uri (str | Path): Uri to be parsed that contains the file prefix. + + Examples: + >>> FileClient.parse_uri_prefix('s3://path/of/your/file') + 's3' + + Returns: + str | None: Return the prefix of uri if the uri contains '://' + else ``None``. + """ + assert is_filepath(uri) + uri = str(uri) + if '://' not in uri: + return None + else: + prefix, _ = uri.split('://') + # In the case of PetrelBackend, the prefix may contains the cluster + # name like clusterName:s3 + if ':' in prefix: + _, prefix = prefix.split(':') + return prefix + + @classmethod + def infer_client(cls, + file_client_args: Optional[dict] = None, + uri: Optional[Union[str, Path]] = None) -> 'FileClient': + """Infer a suitable file client based on the URI and arguments. + + Args: + file_client_args (dict, optional): Arguments to instantiate a + FileClient. Default: None. + uri (str | Path, optional): Uri to be parsed that contains the file + prefix. Default: None. + + Examples: + >>> uri = 's3://path/of/your/file' + >>> file_client = FileClient.infer_client(uri=uri) + >>> file_client_args = {'backend': 'petrel'} + >>> file_client = FileClient.infer_client(file_client_args) + + Returns: + FileClient: Instantiated FileClient object. + """ + assert file_client_args is not None or uri is not None + if file_client_args is None: + file_prefix = cls.parse_uri_prefix(uri) # type: ignore + return cls(prefix=file_prefix) + else: + return cls(**file_client_args) + + @classmethod + def _register_backend(cls, name, backend, force=False, prefixes=None): + if not isinstance(name, str): + raise TypeError('the backend name should be a string, ' + f'but got {type(name)}') + if not inspect.isclass(backend): + raise TypeError( + f'backend should be a class but got {type(backend)}') + if not issubclass(backend, BaseStorageBackend): + raise TypeError( + f'backend {backend} is not a subclass of BaseStorageBackend') + if not force and name in cls._backends: + raise KeyError( + f'{name} is already registered as a storage backend, ' + 'add "force=True" if you want to override it') + + if name in cls._backends and force: + cls._overridden_backends.add(name) + cls._backends[name] = backend + + if prefixes is not None: + if isinstance(prefixes, str): + prefixes = [prefixes] + else: + assert isinstance(prefixes, (list, tuple)) + for prefix in prefixes: + if prefix not in cls._prefix_to_backends: + cls._prefix_to_backends[prefix] = backend + elif (prefix in cls._prefix_to_backends) and force: + cls._overridden_prefixes.add(prefix) + cls._prefix_to_backends[prefix] = backend + else: + raise KeyError( + f'{prefix} is already registered as a storage backend,' + ' add "force=True" if you want to override it') + + @classmethod + def register_backend(cls, name, backend=None, force=False, prefixes=None): + """Register a backend to FileClient. + + This method can be used as a normal class method or a decorator. + + .. code-block:: python + + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + FileClient.register_backend('new', NewBackend) + + or + + .. code-block:: python + + @FileClient.register_backend('new') + class NewBackend(BaseStorageBackend): + + def get(self, filepath): + return filepath + + def get_text(self, filepath): + return filepath + + Args: + name (str): The name of the registered backend. + backend (class, optional): The backend class to be registered, + which must be a subclass of :class:`BaseStorageBackend`. + When this method is used as a decorator, backend is None. + Defaults to None. + force (bool, optional): Whether to override the backend if the name + has already been registered. Defaults to False. + prefixes (str or list[str] or tuple[str], optional): The prefixes + of the registered storage backend. Default: None. + `New in version 1.3.15.` + """ + if backend is not None: + cls._register_backend( + name, backend, force=force, prefixes=prefixes) + return + + def _register(backend_cls): + cls._register_backend( + name, backend_cls, force=force, prefixes=prefixes) + return backend_cls + + return _register + + def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]: + """Read data from a given ``filepath`` with 'rb' mode. + + Note: + There are two types of return values for ``get``, one is ``bytes`` + and the other is ``memoryview``. The advantage of using memoryview + is that you can avoid copying, and if you want to convert it to + ``bytes``, you can use ``.tobytes()``. + + Args: + filepath (str or Path): Path to read data. + + Returns: + bytes | memoryview: Expected bytes object or a memory view of the + bytes object. + """ + return self.client.get(filepath) + + def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str: + """Read data from a given ``filepath`` with 'r' mode. + + Args: + filepath (str or Path): Path to read data. + encoding (str): The encoding format used to open the ``filepath``. + Default: 'utf-8'. + + Returns: + str: Expected text reading from ``filepath``. + """ + return self.client.get_text(filepath, encoding) + + def put(self, obj: bytes, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'wb' mode. + + Note: + ``put`` should create a directory if the directory of ``filepath`` + does not exist. + + Args: + obj (bytes): Data to be written. + filepath (str or Path): Path to write data. + """ + self.client.put(obj, filepath) + + def put_text(self, obj: str, filepath: Union[str, Path]) -> None: + """Write data to a given ``filepath`` with 'w' mode. + + Note: + ``put_text`` should create a directory if the directory of + ``filepath`` does not exist. + + Args: + obj (str): Data to be written. + filepath (str or Path): Path to write data. + encoding (str, optional): The encoding format used to open the + `filepath`. Default: 'utf-8'. + """ + self.client.put_text(obj, filepath) + + def remove(self, filepath: Union[str, Path]) -> None: + """Remove a file. + + Args: + filepath (str, Path): Path to be removed. + """ + self.client.remove(filepath) + + def exists(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path exists. + + Args: + filepath (str or Path): Path to be checked whether exists. + + Returns: + bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. + """ + return self.client.exists(filepath) + + def isdir(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a directory. + + Args: + filepath (str or Path): Path to be checked whether it is a + directory. + + Returns: + bool: Return ``True`` if ``filepath`` points to a directory, + ``False`` otherwise. + """ + return self.client.isdir(filepath) + + def isfile(self, filepath: Union[str, Path]) -> bool: + """Check whether a file path is a file. + + Args: + filepath (str or Path): Path to be checked whether it is a file. + + Returns: + bool: Return ``True`` if ``filepath`` points to a file, ``False`` + otherwise. + """ + return self.client.isfile(filepath) + + def join_path(self, filepath: Union[str, Path], + *filepaths: Union[str, Path]) -> str: + """Concatenate all file paths. + + Join one or more filepath components intelligently. The return value + is the concatenation of filepath and any members of *filepaths. + + Args: + filepath (str or Path): Path to be concatenated. + + Returns: + str: The result of concatenation. + """ + return self.client.join_path(filepath, *filepaths) + + @contextmanager + def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]: + """Download data from ``filepath`` and write the data to local path. + + ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It + can be called with ``with`` statement, and when exists from the + ``with`` statement, the temporary path will be released. + + Note: + If the ``filepath`` is a local path, just return itself. + + .. warning:: + ``get_local_path`` is an experimental interface that may change in + the future. + + Args: + filepath (str or Path): Path to be read data. + + Examples: + >>> file_client = FileClient(prefix='s3') + >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path: + ... # do something here + + Yields: + Iterable[str]: Only yield one path. + """ + with self.client.get_local_path(str(filepath)) as local_path: + yield local_path + + def list_dir_or_file(self, + dir_path: Union[str, Path], + list_dir: bool = True, + list_file: bool = True, + suffix: Optional[Union[str, Tuple[str]]] = None, + recursive: bool = False) -> Iterator[str]: + """Scan a directory to find the interested directories or files in + arbitrary order. + + Note: + :meth:`list_dir_or_file` returns the path relative to ``dir_path``. + + Args: + dir_path (str | Path): Path of the directory. + list_dir (bool): List the directories. Default: True. + list_file (bool): List the path of files. Default: True. + suffix (str or tuple[str], optional): File suffix + that we are interested in. Default: None. + recursive (bool): If set to True, recursively scan the + directory. Default: False. + + Yields: + Iterable[str]: A relative path to ``dir_path``. + """ + yield from self.client.list_dir_or_file(dir_path, list_dir, list_file, + suffix, recursive) diff --git a/mmcv/fileio/handlers/__init__.py b/mmcv/fileio/handlers/__init__.py new file mode 100644 index 0000000..aa24d91 --- /dev/null +++ b/mmcv/fileio/handlers/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import BaseFileHandler +from .json_handler import JsonHandler +from .pickle_handler import PickleHandler +from .yaml_handler import YamlHandler + +__all__ = ['BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler'] diff --git a/mmcv/fileio/handlers/base.py b/mmcv/fileio/handlers/base.py new file mode 100644 index 0000000..288878b --- /dev/null +++ b/mmcv/fileio/handlers/base.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + + +class BaseFileHandler(metaclass=ABCMeta): + # `str_like` is a flag to indicate whether the type of file object is + # str-like object or bytes-like object. Pickle only processes bytes-like + # objects but json only processes str-like object. If it is str-like + # object, `StringIO` will be used to process the buffer. + str_like = True + + @abstractmethod + def load_from_fileobj(self, file, **kwargs): + pass + + @abstractmethod + def dump_to_fileobj(self, obj, file, **kwargs): + pass + + @abstractmethod + def dump_to_str(self, obj, **kwargs): + pass + + def load_from_path(self, filepath, mode='r', **kwargs): + with open(filepath, mode) as f: + return self.load_from_fileobj(f, **kwargs) + + def dump_to_path(self, obj, filepath, mode='w', **kwargs): + with open(filepath, mode) as f: + self.dump_to_fileobj(obj, f, **kwargs) diff --git a/mmcv/fileio/handlers/json_handler.py b/mmcv/fileio/handlers/json_handler.py new file mode 100644 index 0000000..18d4f15 --- /dev/null +++ b/mmcv/fileio/handlers/json_handler.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json + +import numpy as np + +from .base import BaseFileHandler + + +def set_default(obj): + """Set default json values for non-serializable values. + + It helps convert ``set``, ``range`` and ``np.ndarray`` data types to list. + It also converts ``np.generic`` (including ``np.int32``, ``np.float32``, + etc.) into plain numbers of plain python built-in types. + """ + if isinstance(obj, (set, range)): + return list(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + elif isinstance(obj, np.generic): + return obj.item() + raise TypeError(f'{type(obj)} is unsupported for json dump') + + +class JsonHandler(BaseFileHandler): + + def load_from_fileobj(self, file): + return json.load(file) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('default', set_default) + json.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('default', set_default) + return json.dumps(obj, **kwargs) diff --git a/mmcv/fileio/handlers/pickle_handler.py b/mmcv/fileio/handlers/pickle_handler.py new file mode 100644 index 0000000..b37c79b --- /dev/null +++ b/mmcv/fileio/handlers/pickle_handler.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pickle + +from .base import BaseFileHandler + + +class PickleHandler(BaseFileHandler): + + str_like = False + + def load_from_fileobj(self, file, **kwargs): + return pickle.load(file, **kwargs) + + def load_from_path(self, filepath, **kwargs): + return super(PickleHandler, self).load_from_path( + filepath, mode='rb', **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('protocol', 2) + return pickle.dumps(obj, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('protocol', 2) + pickle.dump(obj, file, **kwargs) + + def dump_to_path(self, obj, filepath, **kwargs): + super(PickleHandler, self).dump_to_path( + obj, filepath, mode='wb', **kwargs) diff --git a/mmcv/fileio/handlers/yaml_handler.py b/mmcv/fileio/handlers/yaml_handler.py new file mode 100644 index 0000000..c5aa2ee --- /dev/null +++ b/mmcv/fileio/handlers/yaml_handler.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import yaml + +try: + from yaml import CLoader as Loader, CDumper as Dumper +except ImportError: + from yaml import Loader, Dumper + +from .base import BaseFileHandler # isort:skip + + +class YamlHandler(BaseFileHandler): + + def load_from_fileobj(self, file, **kwargs): + kwargs.setdefault('Loader', Loader) + return yaml.load(file, **kwargs) + + def dump_to_fileobj(self, obj, file, **kwargs): + kwargs.setdefault('Dumper', Dumper) + yaml.dump(obj, file, **kwargs) + + def dump_to_str(self, obj, **kwargs): + kwargs.setdefault('Dumper', Dumper) + return yaml.dump(obj, **kwargs) diff --git a/mmcv/fileio/io.py b/mmcv/fileio/io.py new file mode 100644 index 0000000..aaefde5 --- /dev/null +++ b/mmcv/fileio/io.py @@ -0,0 +1,151 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from io import BytesIO, StringIO +from pathlib import Path + +from ..utils import is_list_of, is_str +from .file_client import FileClient +from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler + +file_handlers = { + 'json': JsonHandler(), + 'yaml': YamlHandler(), + 'yml': YamlHandler(), + 'pickle': PickleHandler(), + 'pkl': PickleHandler() +} + + +def load(file, file_format=None, file_client_args=None, **kwargs): + """Load data from json/yaml/pickle files. + + This method provides a unified api for loading data from serialized files. + + Note: + In v1.3.16 and later, ``load`` supports loading data from serialized + files those can be storaged in different backends. + + Args: + file (str or :obj:`Path` or file-like object): Filename or a file-like + object. + file_format (str, optional): If not specified, the file format will be + inferred from the file extension, otherwise use the specified one. + Currently supported formats include "json", "yaml/yml" and + "pickle/pkl". + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> load('/path/of/your/file') # file is storaged in disk + >>> load('https://path/of/your/file') # file is storaged in Internet + >>> load('s3://path/of/your/file') # file is storaged in petrel + + Returns: + The content from the file. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None and is_str(file): + file_format = file.split('.')[-1] + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO(file_client.get_text(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + else: + with BytesIO(file_client.get(file)) as f: + obj = handler.load_from_fileobj(f, **kwargs) + elif hasattr(file, 'read'): + obj = handler.load_from_fileobj(file, **kwargs) + else: + raise TypeError('"file" must be a filepath str or a file-object') + return obj + + +def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs): + """Dump data to json/yaml/pickle strings or files. + + This method provides a unified api for dumping data as strings or to files, + and also supports custom arguments for each file format. + + Note: + In v1.3.16 and later, ``dump`` supports dumping data as strings or to + files which is saved to different backends. + + Args: + obj (any): The python object to be dumped. + file (str or :obj:`Path` or file-like object, optional): If not + specified, then the object is dumped to a str, otherwise to a file + specified by the filename or file-like object. + file_format (str, optional): Same as :func:`load`. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dump('hello world', '/path/of/your/file') # disk + >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel + + Returns: + bool: True for success, False otherwise. + """ + if isinstance(file, Path): + file = str(file) + if file_format is None: + if is_str(file): + file_format = file.split('.')[-1] + elif file is None: + raise ValueError( + 'file_format must be specified since file is None') + if file_format not in file_handlers: + raise TypeError(f'Unsupported format: {file_format}') + + handler = file_handlers[file_format] + if file is None: + return handler.dump_to_str(obj, **kwargs) + elif is_str(file): + file_client = FileClient.infer_client(file_client_args, file) + if handler.str_like: + with StringIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put_text(f.getvalue(), file) + else: + with BytesIO() as f: + handler.dump_to_fileobj(obj, f, **kwargs) + file_client.put(f.getvalue(), file) + elif hasattr(file, 'write'): + handler.dump_to_fileobj(obj, file, **kwargs) + else: + raise TypeError('"file" must be a filename str or a file-object') + + +def _register_handler(handler, file_formats): + """Register a handler for some file extensions. + + Args: + handler (:obj:`BaseFileHandler`): Handler to be registered. + file_formats (str or list[str]): File formats to be handled by this + handler. + """ + if not isinstance(handler, BaseFileHandler): + raise TypeError( + f'handler must be a child of BaseFileHandler, not {type(handler)}') + if isinstance(file_formats, str): + file_formats = [file_formats] + if not is_list_of(file_formats, str): + raise TypeError('file_formats must be a str or a list of str') + for ext in file_formats: + file_handlers[ext] = handler + + +def register_handler(file_formats, **kwargs): + + def wrap(cls): + _register_handler(cls(**kwargs), file_formats) + return cls + + return wrap diff --git a/mmcv/fileio/parse.py b/mmcv/fileio/parse.py new file mode 100644 index 0000000..f60f0d6 --- /dev/null +++ b/mmcv/fileio/parse.py @@ -0,0 +1,97 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from io import StringIO + +from .file_client import FileClient + + +def list_from_file(filename, + prefix='', + offset=0, + max_num=0, + encoding='utf-8', + file_client_args=None): + """Load a text file and parse the content as a list of strings. + + Note: + In v1.3.16 and later, ``list_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a list for strings. + + Args: + filename (str): Filename. + prefix (str): The prefix to be inserted to the beginning of each item. + offset (int): The offset of lines. + max_num (int): The maximum number of lines to be read, + zeros and negatives mean no limitation. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> list_from_file('/path/of/your/file') # disk + ['hello', 'world'] + >>> list_from_file('s3://path/of/your/file') # ceph or petrel + ['hello', 'world'] + + Returns: + list[str]: A list of strings. + """ + cnt = 0 + item_list = [] + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for _ in range(offset): + f.readline() + for line in f: + if 0 < max_num <= cnt: + break + item_list.append(prefix + line.rstrip('\n\r')) + cnt += 1 + return item_list + + +def dict_from_file(filename, + key_type=str, + encoding='utf-8', + file_client_args=None): + """Load a text file and parse the content as a dict. + + Each line of the text file will be two or more columns split by + whitespaces or tabs. The first column will be parsed as dict keys, and + the following columns will be parsed as dict values. + + Note: + In v1.3.16 and later, ``dict_from_file`` supports loading a text file + which can be storaged in different backends and parsing the content as + a dict. + + Args: + filename(str): Filename. + key_type(type): Type of the dict keys. str is user by default and + type conversion will be performed if specified. + encoding (str): Encoding used to open the file. Default utf-8. + file_client_args (dict, optional): Arguments to instantiate a + FileClient. See :class:`mmcv.fileio.FileClient` for details. + Default: None. + + Examples: + >>> dict_from_file('/path/of/your/file') # disk + {'key1': 'value1', 'key2': 'value2'} + >>> dict_from_file('s3://path/of/your/file') # ceph or petrel + {'key1': 'value1', 'key2': 'value2'} + + Returns: + dict: The parsed contents. + """ + mapping = {} + file_client = FileClient.infer_client(file_client_args, filename) + with StringIO(file_client.get_text(filename, encoding)) as f: + for line in f: + items = line.rstrip('\n').split() + assert len(items) >= 2 + key = key_type(items[0]) + val = items[1:] if len(items) > 2 else items[1] + mapping[key] = val + return mapping diff --git a/mmcv/image/__init__.py b/mmcv/image/__init__.py index 92ecec4..d0051d6 100644 --- a/mmcv/image/__init__.py +++ b/mmcv/image/__init__.py @@ -9,10 +9,10 @@ from .geometric import (cutout, imcrop, imflip, imflip_, impad, from .io import imfrombytes, imread, imwrite, supported_backends, use_backend from .misc import tensor2imgs from .photometric import (adjust_brightness, adjust_color, adjust_contrast, - adjust_hue, adjust_lighting, adjust_sharpness, - auto_contrast, clahe, imdenormalize, imequalize, - iminvert, imnormalize, imnormalize_, lut_transform, - posterize, solarize) + adjust_lighting, adjust_sharpness, auto_contrast, + clahe, imdenormalize, imequalize, iminvert, + imnormalize, imnormalize_, lut_transform, posterize, + solarize) __all__ = [ 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb', @@ -24,6 +24,5 @@ __all__ = [ 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr', 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize', 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe', - 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting', - 'adjust_hue' + 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting' ] diff --git a/mmcv/image/colorspace.py b/mmcv/image/colorspace.py index 08f9952..8145339 100644 --- a/mmcv/image/colorspace.py +++ b/mmcv/image/colorspace.py @@ -1,11 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Callable, Union - import cv2 import numpy as np -def imconvert(img: np.ndarray, src: str, dst: str) -> np.ndarray: +def imconvert(img, src, dst): """Convert an image from the src colorspace to dst colorspace. Args: @@ -21,7 +19,7 @@ def imconvert(img: np.ndarray, src: str, dst: str) -> np.ndarray: return out_img -def bgr2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray: +def bgr2gray(img, keepdim=False): """Convert a BGR image to grayscale image. Args: @@ -38,7 +36,7 @@ def bgr2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray: return out_img -def rgb2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray: +def rgb2gray(img, keepdim=False): """Convert a RGB image to grayscale image. Args: @@ -55,7 +53,7 @@ def rgb2gray(img: np.ndarray, keepdim: bool = False) -> np.ndarray: return out_img -def gray2bgr(img: np.ndarray) -> np.ndarray: +def gray2bgr(img): """Convert a grayscale image to BGR image. Args: @@ -69,7 +67,7 @@ def gray2bgr(img: np.ndarray) -> np.ndarray: return out_img -def gray2rgb(img: np.ndarray) -> np.ndarray: +def gray2rgb(img): """Convert a grayscale image to RGB image. Args: @@ -83,7 +81,7 @@ def gray2rgb(img: np.ndarray) -> np.ndarray: return out_img -def _convert_input_type_range(img: np.ndarray) -> np.ndarray: +def _convert_input_type_range(img): """Convert the type and range of the input image. It converts the input image to np.float32 type and range of [0, 1]. @@ -111,8 +109,7 @@ def _convert_input_type_range(img: np.ndarray) -> np.ndarray: return img -def _convert_output_type_range( - img: np.ndarray, dst_type: Union[np.uint8, np.float32]) -> np.ndarray: +def _convert_output_type_range(img, dst_type): """Convert the type and range of the image according to dst_type. It converts the image to desired type and range. If `dst_type` is np.uint8, @@ -143,7 +140,7 @@ def _convert_output_type_range( return img.astype(dst_type) -def rgb2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray: +def rgb2ycbcr(img, y_only=False): """Convert a RGB image to YCbCr image. This function produces the same results as Matlab's `rgb2ycbcr` function. @@ -163,7 +160,7 @@ def rgb2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray: Returns: ndarray: The converted YCbCr image. The output image has the same type - and range as input image. + and range as input image. """ img_type = img.dtype img = _convert_input_type_range(img) @@ -177,7 +174,7 @@ def rgb2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray: return out_img -def bgr2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray: +def bgr2ycbcr(img, y_only=False): """Convert a BGR image to YCbCr image. The bgr version of rgb2ycbcr. @@ -197,7 +194,7 @@ def bgr2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray: Returns: ndarray: The converted YCbCr image. The output image has the same type - and range as input image. + and range as input image. """ img_type = img.dtype img = _convert_input_type_range(img) @@ -211,7 +208,7 @@ def bgr2ycbcr(img: np.ndarray, y_only: bool = False) -> np.ndarray: return out_img -def ycbcr2rgb(img: np.ndarray) -> np.ndarray: +def ycbcr2rgb(img): """Convert a YCbCr image to RGB image. This function produces the same results as Matlab's ycbcr2rgb function. @@ -230,7 +227,7 @@ def ycbcr2rgb(img: np.ndarray) -> np.ndarray: Returns: ndarray: The converted RGB image. The output image has the same type - and range as input image. + and range as input image. """ img_type = img.dtype img = _convert_input_type_range(img) * 255 @@ -243,7 +240,7 @@ def ycbcr2rgb(img: np.ndarray) -> np.ndarray: return out_img -def ycbcr2bgr(img: np.ndarray) -> np.ndarray: +def ycbcr2bgr(img): """Convert a YCbCr image to BGR image. The bgr version of ycbcr2rgb. @@ -262,7 +259,7 @@ def ycbcr2bgr(img: np.ndarray) -> np.ndarray: Returns: ndarray: The converted BGR image. The output image has the same type - and range as input image. + and range as input image. """ img_type = img.dtype img = _convert_input_type_range(img) * 255 @@ -275,11 +272,11 @@ def ycbcr2bgr(img: np.ndarray) -> np.ndarray: return out_img -def convert_color_factory(src: str, dst: str) -> Callable: +def convert_color_factory(src, dst): code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}') - def convert_color(img: np.ndarray) -> np.ndarray: + def convert_color(img): out_img = cv2.cvtColor(img, code) return out_img diff --git a/mmcv/image/geometric.py b/mmcv/image/geometric.py index f35299b..cf97c20 100644 --- a/mmcv/image/geometric.py +++ b/mmcv/image/geometric.py @@ -1,11 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. import numbers -from typing import List, Optional, Tuple, Union, no_type_check import cv2 import numpy as np -from mmengine.utils import to_2tuple +from ..utils import to_2tuple from .io import imread_backend try: @@ -14,10 +13,7 @@ except ImportError: Image = None -def _scale_size( - size: Tuple[int, int], - scale: Union[float, int, tuple], -) -> Tuple[int, int]: +def _scale_size(size, scale): """Rescale a size by a ratio. Args: @@ -41,47 +37,23 @@ cv2_interp_codes = { 'lanczos': cv2.INTER_LANCZOS4 } -cv2_border_modes = { - 'constant': cv2.BORDER_CONSTANT, - 'replicate': cv2.BORDER_REPLICATE, - 'reflect': cv2.BORDER_REFLECT, - 'wrap': cv2.BORDER_WRAP, - 'reflect_101': cv2.BORDER_REFLECT_101, - 'transparent': cv2.BORDER_TRANSPARENT, - 'isolated': cv2.BORDER_ISOLATED -} - -# Pillow >=v9.1.0 use a slightly different naming scheme for filters. -# Set pillow_interp_codes according to the naming scheme used. if Image is not None: - if hasattr(Image, 'Resampling'): - pillow_interp_codes = { - 'nearest': Image.Resampling.NEAREST, - 'bilinear': Image.Resampling.BILINEAR, - 'bicubic': Image.Resampling.BICUBIC, - 'box': Image.Resampling.BOX, - 'lanczos': Image.Resampling.LANCZOS, - 'hamming': Image.Resampling.HAMMING - } - else: - pillow_interp_codes = { - 'nearest': Image.NEAREST, - 'bilinear': Image.BILINEAR, - 'bicubic': Image.BICUBIC, - 'box': Image.BOX, - 'lanczos': Image.LANCZOS, - 'hamming': Image.HAMMING - } - - -def imresize( - img: np.ndarray, - size: Tuple[int, int], - return_scale: bool = False, - interpolation: str = 'bilinear', - out: Optional[np.ndarray] = None, - backend: Optional[str] = None -) -> Union[Tuple[np.ndarray, float, float], np.ndarray]: + pillow_interp_codes = { + 'nearest': Image.NEAREST, + 'bilinear': Image.BILINEAR, + 'bicubic': Image.BICUBIC, + 'box': Image.BOX, + 'lanczos': Image.LANCZOS, + 'hamming': Image.HAMMING + } + + +def imresize(img, + size, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): """Resize image to a given size. Args: @@ -98,7 +70,7 @@ def imresize( Returns: tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or - `resized_img`. + `resized_img`. """ h, w = img.shape[:2] if backend is None: @@ -123,18 +95,15 @@ def imresize( return resized_img, w_scale, h_scale -@no_type_check -def imresize_to_multiple( - img: np.ndarray, - divisor: Union[int, Tuple[int, int]], - size: Union[int, Tuple[int, int], None] = None, - scale_factor: Union[float, Tuple[float, float], None] = None, - keep_ratio: bool = False, - return_scale: bool = False, - interpolation: str = 'bilinear', - out: Optional[np.ndarray] = None, - backend: Optional[str] = None -) -> Union[Tuple[np.ndarray, float, float], np.ndarray]: +def imresize_to_multiple(img, + divisor, + size=None, + scale_factor=None, + keep_ratio=False, + return_scale=False, + interpolation='bilinear', + out=None, + backend=None): """Resize image according to a given size or scale factor and then rounds up the the resized or rescaled image size to the nearest value that can be divided by the divisor. @@ -161,7 +130,7 @@ def imresize_to_multiple( Returns: tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or - `resized_img`. + `resized_img`. """ h, w = img.shape[:2] if size is not None and scale_factor is not None: @@ -176,7 +145,7 @@ def imresize_to_multiple( size = _scale_size((w, h), scale_factor) divisor = to_2tuple(divisor) - size = tuple(int(np.ceil(s / d)) * d for s, d in zip(size, divisor)) + size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)]) resized_img, w_scale, h_scale = imresize( img, size, @@ -190,13 +159,11 @@ def imresize_to_multiple( return resized_img -def imresize_like( - img: np.ndarray, - dst_img: np.ndarray, - return_scale: bool = False, - interpolation: str = 'bilinear', - backend: Optional[str] = None -) -> Union[Tuple[np.ndarray, float, float], np.ndarray]: +def imresize_like(img, + dst_img, + return_scale=False, + interpolation='bilinear', + backend=None): """Resize image to the same size of a given image. Args: @@ -208,15 +175,13 @@ def imresize_like( Returns: tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or - `resized_img`. + `resized_img`. """ h, w = dst_img.shape[:2] return imresize(img, (w, h), return_scale, interpolation, backend=backend) -def rescale_size(old_size: tuple, - scale: Union[float, int, tuple], - return_scale: bool = False) -> tuple: +def rescale_size(old_size, scale, return_scale=False): """Calculate the new size to be rescaled to. Args: @@ -253,13 +218,11 @@ def rescale_size(old_size: tuple, return new_size -def imrescale( - img: np.ndarray, - scale: Union[float, Tuple[int, int]], - return_scale: bool = False, - interpolation: str = 'bilinear', - backend: Optional[str] = None -) -> Union[np.ndarray, Tuple[np.ndarray, float]]: +def imrescale(img, + scale, + return_scale=False, + interpolation='bilinear', + backend=None): """Resize image while keeping the aspect ratio. Args: @@ -286,7 +249,7 @@ def imrescale( return rescaled_img -def imflip(img: np.ndarray, direction: str = 'horizontal') -> np.ndarray: +def imflip(img, direction='horizontal'): """Flip an image horizontally or vertically. Args: @@ -306,7 +269,7 @@ def imflip(img: np.ndarray, direction: str = 'horizontal') -> np.ndarray: return np.flip(img, axis=(0, 1)) -def imflip_(img: np.ndarray, direction: str = 'horizontal') -> np.ndarray: +def imflip_(img, direction='horizontal'): """Inplace flip an image horizontally or vertically. Args: @@ -326,33 +289,30 @@ def imflip_(img: np.ndarray, direction: str = 'horizontal') -> np.ndarray: return cv2.flip(img, -1, img) -def imrotate(img: np.ndarray, - angle: float, - center: Optional[Tuple[float, float]] = None, - scale: float = 1.0, - border_value: int = 0, - interpolation: str = 'bilinear', - auto_bound: bool = False, - border_mode: str = 'constant') -> np.ndarray: +def imrotate(img, + angle, + center=None, + scale=1.0, + border_value=0, + interpolation='bilinear', + auto_bound=False): """Rotate an image. Args: - img (np.ndarray): Image to be rotated. + img (ndarray): Image to be rotated. angle (float): Rotation angle in degrees, positive values mean clockwise rotation. center (tuple[float], optional): Center point (w, h) of the rotation in the source image. If not specified, the center of the image will be used. scale (float): Isotropic scale factor. - border_value (int): Border value used in case of a constant border. - Defaults to 0. + border_value (int): Border value. interpolation (str): Same as :func:`resize`. auto_bound (bool): Whether to adjust the image size to cover the whole rotated image. - border_mode (str): Pixel extrapolation method. Defaults to 'constant'. Returns: - np.ndarray: The rotated image. + ndarray: The rotated image. """ if center is not None and auto_bound: raise ValueError('`auto_bound` conflicts with `center`') @@ -375,12 +335,11 @@ def imrotate(img: np.ndarray, img, matrix, (w, h), flags=cv2_interp_codes[interpolation], - borderMode=cv2_border_modes[border_mode], borderValue=border_value) return rotated -def bbox_clip(bboxes: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray: +def bbox_clip(bboxes, img_shape): """Clip bboxes to fit the image shape. Args: @@ -398,9 +357,7 @@ def bbox_clip(bboxes: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray: return clipped_bboxes -def bbox_scaling(bboxes: np.ndarray, - scale: float, - clip_shape: Optional[Tuple[int, int]] = None) -> np.ndarray: +def bbox_scaling(bboxes, scale, clip_shape=None): """Scaling bboxes w.r.t the box center. Args: @@ -426,12 +383,7 @@ def bbox_scaling(bboxes: np.ndarray, return scaled_bboxes -def imcrop( - img: np.ndarray, - bboxes: np.ndarray, - scale: float = 1.0, - pad_fill: Union[float, list, None] = None -) -> Union[np.ndarray, List[np.ndarray]]: +def imcrop(img, bboxes, scale=1.0, pad_fill=None): """Crop image patches. 3 steps: scale the bboxes -> clip bboxes -> crop and pad. @@ -440,7 +392,7 @@ def imcrop( img (ndarray): Image to be cropped. bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. scale (float, optional): Scale ratio of bboxes, the default value - 1.0 means no scaling. + 1.0 means no padding. pad_fill (Number | list[Number]): Value to be filled for padding. Default: None, which means no padding. @@ -464,12 +416,10 @@ def imcrop( patch = img[y1:y2 + 1, x1:x2 + 1, ...] else: _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) - patch_h = _y2 - _y1 + 1 - patch_w = _x2 - _x1 + 1 if chn == 1: - patch_shape = (patch_h, patch_w) + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1) else: - patch_shape = (patch_h, patch_w, chn) # type: ignore + patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn) patch = np.array( pad_fill, dtype=img.dtype) * np.ones( patch_shape, dtype=img.dtype) @@ -487,12 +437,12 @@ def imcrop( return patches -def impad(img: np.ndarray, +def impad(img, *, - shape: Optional[Tuple[int, int]] = None, - padding: Union[int, tuple, None] = None, - pad_val: Union[float, List] = 0, - padding_mode: str = 'constant') -> np.ndarray: + shape=None, + padding=None, + pad_val=0, + padding_mode='constant'): """Pad the given image to a certain shape or pad on all sides with specified padding mode and padding value. @@ -512,16 +462,16 @@ def impad(img: np.ndarray, reflect or symmetric. Default: constant. - constant: pads with a constant value, this value is specified - with pad_val. + with pad_val. - edge: pads with the last value at the edge of the image. - - reflect: pads with reflection of image without repeating the last - value on the edge. For example, padding [1, 2, 3, 4] with 2 - elements on both sides in reflect mode will result in - [3, 2, 1, 2, 3, 4, 3, 2]. - - symmetric: pads with reflection of image repeating the last value - on the edge. For example, padding [1, 2, 3, 4] with 2 elements on - both sides in symmetric mode will result in - [2, 1, 1, 2, 3, 4, 4, 3] + - reflect: pads with reflection of image without repeating the + last value on the edge. For example, padding [1, 2, 3, 4] + with 2 elements on both sides in reflect mode will result + in [3, 2, 1, 2, 3, 4, 3, 2]. + - symmetric: pads with reflection of image repeating the last + value on the edge. For example, padding [1, 2, 3, 4] with + 2 elements on both sides in symmetric mode will result in + [2, 1, 1, 2, 3, 4, 4, 3] Returns: ndarray: The padded image. @@ -529,9 +479,7 @@ def impad(img: np.ndarray, assert (shape is not None) ^ (padding is not None) if shape is not None: - width = max(shape[1] - img.shape[1], 0) - height = max(shape[0] - img.shape[0], 0) - padding = (0, 0, width, height) + padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0]) # check pad_val if isinstance(pad_val, tuple): @@ -571,9 +519,7 @@ def impad(img: np.ndarray, return img -def impad_to_multiple(img: np.ndarray, - divisor: int, - pad_val: Union[float, List] = 0) -> np.ndarray: +def impad_to_multiple(img, divisor, pad_val=0): """Pad an image to ensure each edge to be multiple to some number. Args: @@ -589,9 +535,7 @@ def impad_to_multiple(img: np.ndarray, return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) -def cutout(img: np.ndarray, - shape: Union[int, Tuple[int, int]], - pad_val: Union[int, float, tuple] = 0) -> np.ndarray: +def cutout(img, shape, pad_val=0): """Randomly cut out a rectangle from the original img. Args: @@ -635,7 +579,7 @@ def cutout(img: np.ndarray, if img.ndim == 2: patch_shape = (y2 - y1, x2 - x1) else: - patch_shape = (y2 - y1, x2 - x1, channels) # type: ignore + patch_shape = (y2 - y1, x2 - x1, channels) img_cutout = img.copy() patch = np.array( @@ -646,8 +590,7 @@ def cutout(img: np.ndarray, return img_cutout -def _get_shear_matrix(magnitude: Union[int, float], - direction: str = 'horizontal') -> np.ndarray: +def _get_shear_matrix(magnitude, direction='horizontal'): """Generate the shear matrix for transformation. Args: @@ -665,11 +608,11 @@ def _get_shear_matrix(magnitude: Union[int, float], return shear_matrix -def imshear(img: np.ndarray, - magnitude: Union[int, float], - direction: str = 'horizontal', - border_value: Union[int, Tuple[int, int]] = 0, - interpolation: str = 'bilinear') -> np.ndarray: +def imshear(img, + magnitude, + direction='horizontal', + border_value=0, + interpolation='bilinear'): """Shear an image. Args: @@ -693,7 +636,7 @@ def imshear(img: np.ndarray, elif img.ndim == 3: channels = img.shape[-1] if isinstance(border_value, int): - border_value = tuple([border_value] * channels) # type: ignore + border_value = tuple([border_value] * channels) elif isinstance(border_value, tuple): assert len(border_value) == channels, \ 'Expected the num of elements in tuple equals the channels' \ @@ -711,13 +654,12 @@ def imshear(img: np.ndarray, # greater than 3 (e.g. shearing masks whose channels large # than 3) will raise TypeError in `cv2.warpAffine`. # Here simply slice the first 3 values in `border_value`. - borderValue=border_value[:3], # type: ignore + borderValue=border_value[:3], flags=cv2_interp_codes[interpolation]) return sheared -def _get_translate_matrix(offset: Union[int, float], - direction: str = 'horizontal') -> np.ndarray: +def _get_translate_matrix(offset, direction='horizontal'): """Generate the translate matrix. Args: @@ -735,11 +677,11 @@ def _get_translate_matrix(offset: Union[int, float], return translate_matrix -def imtranslate(img: np.ndarray, - offset: Union[int, float], - direction: str = 'horizontal', - border_value: Union[int, tuple] = 0, - interpolation: str = 'bilinear') -> np.ndarray: +def imtranslate(img, + offset, + direction='horizontal', + border_value=0, + interpolation='bilinear'): """Translate an image. Args: diff --git a/mmcv/image/io.py b/mmcv/image/io.py index e10d443..d47aaa8 100644 --- a/mmcv/image/io.py +++ b/mmcv/image/io.py @@ -1,16 +1,14 @@ # Copyright (c) OpenMMLab. All rights reserved. import io import os.path as osp -import warnings from pathlib import Path -from typing import Optional, Union import cv2 -import mmengine.fileio as fileio import numpy as np from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION, IMREAD_UNCHANGED) -from mmengine.utils import is_filepath, is_str + +from mmcv.utils import check_file_exist, is_str, mkdir_or_exist try: from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG @@ -42,7 +40,7 @@ imread_flags = { imread_backend = 'cv2' -def use_backend(backend: str) -> None: +def use_backend(backend): """Select a backend for image decoding. Args: @@ -68,7 +66,7 @@ def use_backend(backend: str) -> None: raise ImportError('`tifffile` is not installed') -def _jpegflag(flag: str = 'color', channel_order: str = 'bgr'): +def _jpegflag(flag='color', channel_order='bgr'): channel_order = channel_order.lower() if channel_order not in ['rgb', 'bgr']: raise ValueError('channel order must be either "rgb" or "bgr"') @@ -84,9 +82,7 @@ def _jpegflag(flag: str = 'color', channel_order: str = 'bgr'): raise ValueError('flag must be "color" or "grayscale"') -def _pillow2array(img, - flag: str = 'color', - channel_order: str = 'bgr') -> np.ndarray: +def _pillow2array(img, flag='color', channel_order='bgr'): """Convert a pillow image to numpy array. Args: @@ -141,13 +137,7 @@ def _pillow2array(img, return array -def imread(img_or_path: Union[np.ndarray, str, Path], - flag: str = 'color', - channel_order: str = 'bgr', - backend: Optional[str] = None, - file_client_args: Optional[dict] = None, - *, - backend_args: Optional[dict] = None) -> np.ndarray: +def imread(img_or_path, flag='color', channel_order='bgr', backend=None): """Read an image. Args: @@ -167,117 +157,78 @@ def imread(img_or_path: Union[np.ndarray, str, Path], `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. If backend is None, the global imread_backend specified by ``mmcv.use_backend()`` will be used. Default: None. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmengine.fileio.FileClient` for details. - Default: None. It will be deprecated in future. Please use - ``backend_args`` instead. - Deprecated in version 2.0.0rc4. - backend_args (dict, optional): Instantiates the corresponding file - backend. It may contain `backend` key to specify the file - backend. If it contains, the file backend corresponding to this - value will be used and initialized with the remaining values, - otherwise the corresponding file backend will be selected - based on the prefix of the file path. Defaults to None. - New in version 2.0.0rc4. Returns: ndarray: Loaded image array. - - Examples: - >>> import mmcv - >>> img_path = '/path/to/img.jpg' - >>> img = mmcv.imread(img_path) - >>> img = mmcv.imread(img_path, flag='color', channel_order='rgb', - ... backend='cv2') - >>> img = mmcv.imread(img_path, flag='color', channel_order='bgr', - ... backend='pillow') - >>> s3_img_path = 's3://bucket/img.jpg' - >>> # infer the file backend by the prefix s3 - >>> img = mmcv.imread(s3_img_path) - >>> # manually set the file backend petrel - >>> img = mmcv.imread(s3_img_path, backend_args={ - ... 'backend': 'petrel'}) - >>> http_img_path = 'http://path/to/img.jpg' - >>> img = mmcv.imread(http_img_path) - >>> img = mmcv.imread(http_img_path, backend_args={ - ... 'backend': 'http'}) """ - if file_client_args is not None: - warnings.warn( - '"file_client_args" will be deprecated in future. ' - 'Please use "backend_args" instead', DeprecationWarning) - if backend_args is not None: - raise ValueError( - '"file_client_args" and "backend_args" cannot be set at the ' - 'same time.') + if backend is None: + backend = imread_backend + if backend not in supported_backends: + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") if isinstance(img_or_path, Path): img_or_path = str(img_or_path) if isinstance(img_or_path, np.ndarray): return img_or_path elif is_str(img_or_path): - if file_client_args is not None: - file_client = fileio.FileClient.infer_client( - file_client_args, img_or_path) - img_bytes = file_client.get(img_or_path) + check_file_exist(img_or_path, + f'img file does not exist: {img_or_path}') + if backend == 'turbojpeg': + with open(img_or_path, 'rb') as in_file: + img = jpeg.decode(in_file.read(), + _jpegflag(flag, channel_order)) + if img.shape[-1] == 1: + img = img[:, :, 0] + return img + elif backend == 'pillow': + img = Image.open(img_or_path) + img = _pillow2array(img, flag, channel_order) + return img + elif backend == 'tifffile': + img = tifffile.imread(img_or_path) + return img else: - img_bytes = fileio.get(img_or_path, backend_args=backend_args) - return imfrombytes(img_bytes, flag, channel_order, backend) + flag = imread_flags[flag] if is_str(flag) else flag + img = cv2.imread(img_or_path, flag) + if flag == IMREAD_COLOR and channel_order == 'rgb': + cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) + return img else: raise TypeError('"img" must be a numpy array or a str or ' 'a pathlib.Path object') -def imfrombytes(content: bytes, - flag: str = 'color', - channel_order: str = 'bgr', - backend: Optional[str] = None) -> np.ndarray: +def imfrombytes(content, flag='color', channel_order='bgr', backend=None): """Read an image from bytes. Args: content (bytes): Image bytes got from files or other streams. flag (str): Same as :func:`imread`. - channel_order (str): The channel order of the output, candidates - are 'bgr' and 'rgb'. Default to 'bgr'. backend (str | None): The image decoding backend type. Options are - `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. If backend is - None, the global imread_backend specified by ``mmcv.use_backend()`` - will be used. Default: None. + `cv2`, `pillow`, `turbojpeg`, `None`. If backend is None, the + global imread_backend specified by ``mmcv.use_backend()`` will be + used. Default: None. Returns: ndarray: Loaded image array. - - Examples: - >>> img_path = '/path/to/img.jpg' - >>> with open(img_path, 'rb') as f: - >>> img_buff = f.read() - >>> img = mmcv.imfrombytes(img_buff) - >>> img = mmcv.imfrombytes(img_buff, flag='color', channel_order='rgb') - >>> img = mmcv.imfrombytes(img_buff, backend='pillow') - >>> img = mmcv.imfrombytes(img_buff, backend='cv2') """ if backend is None: backend = imread_backend if backend not in supported_backends: - raise ValueError( - f'backend: {backend} is not supported. Supported ' - "backends are 'cv2', 'turbojpeg', 'pillow', 'tifffile'") + raise ValueError(f'backend: {backend} is not supported. Supported ' + "backends are 'cv2', 'turbojpeg', 'pillow'") if backend == 'turbojpeg': - img = jpeg.decode( # type: ignore - content, _jpegflag(flag, channel_order)) + img = jpeg.decode(content, _jpegflag(flag, channel_order)) if img.shape[-1] == 1: img = img[:, :, 0] return img elif backend == 'pillow': - with io.BytesIO(content) as buff: - img = Image.open(buff) - img = _pillow2array(img, flag, channel_order) - return img - elif backend == 'tifffile': - with io.BytesIO(content) as buff: - img = tifffile.imread(buff) + buff = io.BytesIO(content) + img = Image.open(buff) + img = _pillow2array(img, flag, channel_order) return img else: img_np = np.frombuffer(content, np.uint8) @@ -288,77 +239,20 @@ def imfrombytes(content: bytes, return img -def imwrite(img: np.ndarray, - file_path: str, - params: Optional[list] = None, - auto_mkdir: Optional[bool] = None, - file_client_args: Optional[dict] = None, - *, - backend_args: Optional[dict] = None) -> bool: +def imwrite(img, file_path, params=None, auto_mkdir=True): """Write image to file. - Warning: - The parameter `auto_mkdir` will be deprecated in the future and every - file clients will make directory automatically. - Args: img (ndarray): Image array to be written. file_path (str): Image file path. params (None or list): Same as opencv :func:`imwrite` interface. auto_mkdir (bool): If the parent folder of `file_path` does not exist, - whether to create it automatically. It will be deprecated. - file_client_args (dict, optional): Arguments to instantiate a - FileClient. See :class:`mmengine.fileio.FileClient` for details. - Default: None. It will be deprecated in future. Please use - ``backend_args`` instead. - Deprecated in version 2.0.0rc4. - backend_args (dict, optional): Instantiates the corresponding file - backend. It may contain `backend` key to specify the file - backend. If it contains, the file backend corresponding to this - value will be used and initialized with the remaining values, - otherwise the corresponding file backend will be selected - based on the prefix of the file path. Defaults to None. - New in version 2.0.0rc4. + whether to create it automatically. Returns: bool: Successful or not. - - Examples: - >>> # write to hard disk client - >>> ret = mmcv.imwrite(img, '/path/to/img.jpg') - >>> # infer the file backend by the prefix s3 - >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg') - >>> # manually set the file backend petrel - >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg', backend_args={ - ... 'backend': 'petrel'}) """ - if file_client_args is not None: - warnings.warn( - '"file_client_args" will be deprecated in future. ' - 'Please use "backend_args" instead', DeprecationWarning) - if backend_args is not None: - raise ValueError( - '"file_client_args" and "backend_args" cannot be set at the ' - 'same time.') - - assert is_filepath(file_path) - file_path = str(file_path) - if auto_mkdir is not None: - warnings.warn( - 'The parameter `auto_mkdir` will be deprecated in the future and ' - 'every file clients will make directory automatically.') - - img_ext = osp.splitext(file_path)[-1] - # Encode image according to image suffix. - # For example, if image path is '/path/your/img.jpg', the encode - # format is '.jpg'. - flag, img_buff = cv2.imencode(img_ext, img, params) - - if file_client_args is not None: - file_client = fileio.FileClient.infer_client(file_client_args, - file_path) - file_client.put(img_buff.tobytes(), file_path) - else: - fileio.put(img_buff.tobytes(), file_path, backend_args=backend_args) - - return flag + if auto_mkdir: + dir_name = osp.abspath(osp.dirname(file_path)) + mkdir_or_exist(dir_name) + return cv2.imwrite(file_path, img, params) diff --git a/mmcv/image/misc.py b/mmcv/image/misc.py index e923cad..dfc4a9c 100644 --- a/mmcv/image/misc.py +++ b/mmcv/image/misc.py @@ -1,6 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Optional - import numpy as np import mmcv @@ -11,24 +9,18 @@ except ImportError: torch = None -def tensor2imgs(tensor, - mean: Optional[tuple] = None, - std: Optional[tuple] = None, - to_rgb: bool = True) -> list: - """Convert tensor to 3-channel images or 1-channel gray images. +def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): + """Convert tensor to 3-channel images. Args: tensor (torch.Tensor): Tensor that contains multiple images, shape ( - N, C, H, W). :math:`C` can be either 3 or 1. - mean (tuple[float], optional): Mean of images. If None, - (0, 0, 0) will be used for tensor with 3-channel, - while (0, ) for tensor with 1-channel. Defaults to None. - std (tuple[float], optional): Standard deviation of images. If None, - (1, 1, 1) will be used for tensor with 3-channel, - while (1, ) for tensor with 1-channel. Defaults to None. + N, C, H, W). + mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0). + std (tuple[float], optional): Standard deviation of images. + Defaults to (1, 1, 1). to_rgb (bool, optional): Whether the tensor was converted to RGB format in the first place. If so, convert it back to BGR. - For the tensor with 1 channel, it must be False. Defaults to True. + Defaults to True. Returns: list[np.ndarray]: A list that contains multiple images. @@ -37,14 +29,8 @@ def tensor2imgs(tensor, if torch is None: raise RuntimeError('pytorch is not installed') assert torch.is_tensor(tensor) and tensor.ndim == 4 - channels = tensor.size(1) - assert channels in [1, 3] - if mean is None: - mean = (0, ) * channels - if std is None: - std = (1, ) * channels - assert (channels == len(mean) == len(std) == 3) or \ - (channels == len(mean) == len(std) == 1 and not to_rgb) + assert len(mean) == 3 + assert len(std) == 3 num_imgs = tensor.size(0) mean = np.array(mean, dtype=np.float32) diff --git a/mmcv/image/photometric.py b/mmcv/image/photometric.py index 12cbb90..5085d01 100644 --- a/mmcv/image/photometric.py +++ b/mmcv/image/photometric.py @@ -1,14 +1,9 @@ # Copyright (c) OpenMMLab. All rights reserved. -import warnings -from typing import Optional - import cv2 import numpy as np -from mmengine.utils import is_tuple_of -from PIL import Image, ImageEnhance +from ..utils import is_tuple_of from .colorspace import bgr2gray, gray2bgr -from .io import imread_backend def imnormalize(img, mean, std, to_rgb=True): @@ -102,7 +97,7 @@ def posterize(img, bits): return img -def adjust_color(img, alpha=1, beta=None, gamma=0, backend=None): +def adjust_color(img, alpha=1, beta=None, gamma=0): r"""It blends the source image and its gray image: .. math:: @@ -115,41 +110,22 @@ def adjust_color(img, alpha=1, beta=None, gamma=0, backend=None): If None, it's assigned the value (1 - `alpha`). gamma (int | float): Scalar added to each sum. Same as :func:`cv2.addWeighted`. Default 0. - backend (str | None): The image processing backend type. Options are - `cv2`, `pillow`, `None`. If backend is None, the global - ``imread_backend`` specified by ``mmcv.use_backend()`` will be - used. Defaults to None. Returns: ndarray: Colored image which has the same size and dtype as input. """ - if backend is None: - backend = imread_backend - if backend not in ['cv2', 'pillow']: - raise ValueError(f'backend: {backend} is not supported.' - f"Supported backends are 'cv2', 'pillow'") - - if backend == 'pillow': - assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' - warnings.warn("Only use 'alpha' for pillow backend.") - # Image.fromarray defaultly supports RGB, not BGR. - pil_image = Image.fromarray(img[..., ::-1], mode='RGB') - enhancer = ImageEnhance.Color(pil_image) - pil_image = enhancer.enhance(alpha) - return np.array(pil_image, dtype=img.dtype)[..., ::-1] - else: - gray_img = bgr2gray(img) - gray_img = np.tile(gray_img[..., None], [1, 1, 3]) - if beta is None: - beta = 1 - alpha - colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) - if not colored_img.dtype == np.uint8: - # Note when the dtype of `img` is not the default `np.uint8` - # (e.g. np.float32), the value in `colored_img` got from cv2 - # is not guaranteed to be in range [0, 255], so here clip - # is needed. - colored_img = np.clip(colored_img, 0, 255) - return colored_img.astype(img.dtype) + gray_img = bgr2gray(img) + gray_img = np.tile(gray_img[..., None], [1, 1, 3]) + if beta is None: + beta = 1 - alpha + colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma) + if not colored_img.dtype == np.uint8: + # Note when the dtype of `img` is not the default `np.uint8` + # (e.g. np.float32), the value in `colored_img` got from cv2 + # is not guaranteed to be in range [0, 255], so here clip + # is needed. + colored_img = np.clip(colored_img, 0, 255) + return colored_img def imequalize(img): @@ -197,7 +173,7 @@ def imequalize(img): return equalized_img.astype(img.dtype) -def adjust_brightness(img, factor=1., backend=None): +def adjust_brightness(img, factor=1.): """Adjust image brightness. This function controls the brightness of an image. An @@ -214,40 +190,22 @@ def adjust_brightness(img, factor=1., backend=None): Factor 1.0 returns the original image, lower factors mean less color (brightness, contrast, etc), and higher values more. Default 1. - backend (str | None): The image processing backend type. Options are - `cv2`, `pillow`, `None`. If backend is None, the global - ``imread_backend`` specified by ``mmcv.use_backend()`` will be - used. Defaults to None. Returns: ndarray: The brightened image. """ - if backend is None: - backend = imread_backend - if backend not in ['cv2', 'pillow']: - raise ValueError(f'backend: {backend} is not supported.' - f"Supported backends are 'cv2', 'pillow'") - - if backend == 'pillow': - assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' - # Image.fromarray defaultly supports RGB, not BGR. - pil_image = Image.fromarray(img[..., ::-1], mode='RGB') - enhancer = ImageEnhance.Brightness(pil_image) - pil_image = enhancer.enhance(factor) - return np.array(pil_image, dtype=img.dtype)[..., ::-1] - else: - degenerated = np.zeros_like(img) - # Note manually convert the dtype to np.float32, to - # achieve as close results as PIL.ImageEnhance.Brightness. - # Set beta=1-factor, and gamma=0 - brightened_img = cv2.addWeighted( - img.astype(np.float32), factor, degenerated.astype(np.float32), - 1 - factor, 0) - brightened_img = np.clip(brightened_img, 0, 255) - return brightened_img.astype(img.dtype) - - -def adjust_contrast(img, factor=1., backend=None): + degenerated = np.zeros_like(img) + # Note manually convert the dtype to np.float32, to + # achieve as close results as PIL.ImageEnhance.Brightness. + # Set beta=1-factor, and gamma=0 + brightened_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + brightened_img = np.clip(brightened_img, 0, 255) + return brightened_img.astype(img.dtype) + + +def adjust_contrast(img, factor=1.): """Adjust image contrast. This function controls the contrast of an image. An @@ -261,38 +219,20 @@ def adjust_contrast(img, factor=1., backend=None): Args: img (ndarray): Image to be contrasted. BGR order. factor (float): Same as :func:`mmcv.adjust_brightness`. - backend (str | None): The image processing backend type. Options are - `cv2`, `pillow`, `None`. If backend is None, the global - ``imread_backend`` specified by ``mmcv.use_backend()`` will be - used. Defaults to None. Returns: ndarray: The contrasted image. """ - if backend is None: - backend = imread_backend - if backend not in ['cv2', 'pillow']: - raise ValueError(f'backend: {backend} is not supported.' - f"Supported backends are 'cv2', 'pillow'") - - if backend == 'pillow': - assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' - # Image.fromarray defaultly supports RGB, not BGR. - pil_image = Image.fromarray(img[..., ::-1], mode='RGB') - enhancer = ImageEnhance.Contrast(pil_image) - pil_image = enhancer.enhance(factor) - return np.array(pil_image, dtype=img.dtype)[..., ::-1] - else: - gray_img = bgr2gray(img) - hist = np.histogram(gray_img, 256, (0, 255))[0] - mean = round(np.sum(gray_img) / np.sum(hist)) - degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) - degenerated = gray2bgr(degenerated) - contrasted_img = cv2.addWeighted( - img.astype(np.float32), factor, degenerated.astype(np.float32), - 1 - factor, 0) - contrasted_img = np.clip(contrasted_img, 0, 255) - return contrasted_img.astype(img.dtype) + gray_img = bgr2gray(img) + hist = np.histogram(gray_img, 256, (0, 255))[0] + mean = round(np.sum(gray_img) / np.sum(hist)) + degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype) + degenerated = gray2bgr(degenerated) + contrasted_img = cv2.addWeighted( + img.astype(np.float32), factor, degenerated.astype(np.float32), + 1 - factor, 0) + contrasted_img = np.clip(contrasted_img, 0, 255) + return contrasted_img.astype(img.dtype) def auto_contrast(img, cutoff=0): @@ -486,76 +426,3 @@ def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)): clahe = cv2.createCLAHE(clip_limit, tile_grid_size) return clahe.apply(np.array(img, dtype=np.uint8)) - - -def adjust_hue(img: np.ndarray, - hue_factor: float, - backend: Optional[str] = None) -> np.ndarray: - """Adjust hue of an image. - - The image hue is adjusted by converting the image to HSV and cyclically - shifting the intensities in the hue channel (H). The image is then - converted back to original image mode. - - `hue_factor` is the amount of shift in H channel and must be in the - interval `[-0.5, 0.5]`. - - Modified from - https://github.com/pytorch/vision/blob/main/torchvision/ - transforms/functional.py - - Args: - img (ndarray): Image to be adjusted. - hue_factor (float): How much to shift the hue channel. Should be in - [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in - HSV space in positive and negative direction respectively. - 0 means no shift. Therefore, both -0.5 and 0.5 will give an image - with complementary colors while 0 gives the original image. - backend (str | None): The image processing backend type. Options are - `cv2`, `pillow`, `None`. If backend is None, the global - ``imread_backend`` specified by ``mmcv.use_backend()`` will be - used. Defaults to None. - - Returns: - ndarray: Hue adjusted image. - """ - if backend is None: - backend = imread_backend - if backend not in ['cv2', 'pillow']: - raise ValueError(f'backend: {backend} is not supported.' - f"Supported backends are 'cv2', 'pillow'") - - if not (-0.5 <= hue_factor <= 0.5): - raise ValueError(f'hue_factor:{hue_factor} is not in [-0.5, 0.5].') - if not (isinstance(img, np.ndarray) and (img.ndim in {2, 3})): - raise TypeError('img should be ndarray with dim=[2 or 3].') - - if backend == 'pillow': - assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' - # Image.fromarray defaultly supports RGB, not BGR. - pil_image = Image.fromarray(img[..., ::-1], mode='RGB') - input_mode = pil_image.mode - if input_mode in {'L', '1', 'I', 'F'}: - return pil_image - - h, s, v = pil_image.convert('HSV').split() - - np_h = np.array(h, dtype=np.uint8) - # uint8 addition take cares of rotation across boundaries - with np.errstate(over='ignore'): - np_h += np.uint8(hue_factor * 255) - h = Image.fromarray(np_h, 'L') - - pil_image = Image.merge('HSV', (h, s, v)).convert(input_mode) - return np.array(pil_image, dtype=img.dtype)[..., ::-1] - else: - dtype = img.dtype - img = img.astype(np.uint8) - hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV_FULL) - h, s, v = cv2.split(hsv_img) - h = h.astype(np.uint8) - # uint8 addition take cares of rotation across boundaries - with np.errstate(over='ignore'): - h += np.uint8(hue_factor * 255) - hsv_img = cv2.merge([h, s, v]) - return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype) diff --git a/mmcv/model_zoo/deprecated.json b/mmcv/model_zoo/deprecated.json new file mode 100644 index 0000000..25cf6f2 --- /dev/null +++ b/mmcv/model_zoo/deprecated.json @@ -0,0 +1,6 @@ +{ + "resnet50_caffe": "detectron/resnet50_caffe", + "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr", + "resnet101_caffe": "detectron/resnet101_caffe", + "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr" +} diff --git a/mmcv/model_zoo/mmcls.json b/mmcv/model_zoo/mmcls.json new file mode 100644 index 0000000..c073a41 --- /dev/null +++ b/mmcv/model_zoo/mmcls.json @@ -0,0 +1,59 @@ +{ + "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth", + "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth", + "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth", + "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth", + "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth", + "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth", + "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth", + "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth", + "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth", + "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth", + "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth", + "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.pth", + "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_8xb32_in1k_20210901-4d7582fa.pth", + "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth", + "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_b32x8_imagenet_20210531-6e13bcd3.pth", + "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_b32x8_imagenet_20210531-278cf22a.pth", + "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth", + "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth", + "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth", + "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth", + "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth", + "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth", + "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth", + "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth", + "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth", + "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth", + "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth", + "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth", + "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth", + "mobilenet_v3_small": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth", + "mobilenet_v3_large": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_large-3ea3c186.pth", + "repvgg_A0": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth", + "repvgg_A1": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A1_3rdparty_4xb64-coslr-120e_in1k_20210909-24003a24.pth", + "repvgg_A2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A2_3rdparty_4xb64-coslr-120e_in1k_20210909-97d7695a.pth", + "repvgg_B0": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B0_3rdparty_4xb64-coslr-120e_in1k_20210909-446375f4.pth", + "repvgg_B1": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1_3rdparty_4xb64-coslr-120e_in1k_20210909-750cdf67.pth", + "repvgg_B1g2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g2_3rdparty_4xb64-coslr-120e_in1k_20210909-344f6422.pth", + "repvgg_B1g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g4_3rdparty_4xb64-coslr-120e_in1k_20210909-d4c1a642.pth", + "repvgg_B2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2_3rdparty_4xb64-coslr-120e_in1k_20210909-bd6b937c.pth", + "repvgg_B2g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-7b7955f0.pth", + "repvgg_B3": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-dda968bf.pth", + "repvgg_B3g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-4e54846a.pth", + "repvgg_D2se": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-D2se_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-cf3139b7.pth", + "res2net101_w26": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net101-w26-s4_3rdparty_8xb32_in1k_20210927-870b6c36.pth", + "res2net50_w14": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w14-s8_3rdparty_8xb32_in1k_20210927-bc967bf1.pth", + "res2net50_w26": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w26-s8_3rdparty_8xb32_in1k_20210927-f547a94b.pth", + "swin_tiny": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth", + "swin_small": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219-7f9d988b.pth", + "swin_base": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window7_224_22kto1k-f967f799.pth", + "swin_large": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_large_patch4_window7_224_22kto1k-5f0996db.pth", + "t2t_vit_t_14": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-14_3rdparty_8xb64_in1k_20210928-b7c09b62.pth", + "t2t_vit_t_19": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-19_3rdparty_8xb64_in1k_20210928-7f1478d5.pth", + "t2t_vit_t_24": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-24_3rdparty_8xb64_in1k_20210928-fe95a61b.pth", + "tnt_small": "https://download.openmmlab.com/mmclassification/v0/tnt/tnt-small-p16_3rdparty_in1k_20210903-c56ee7df.pth", + "vit_base_p16": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-base-p16_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-98e8652b.pth", + "vit_base_p32": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-base-p32_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-9cea8599.pth", + "vit_large_p16": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-large-p16_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-b20ba619.pth" +} diff --git a/mmcv/model_zoo/open_mmlab.json b/mmcv/model_zoo/open_mmlab.json new file mode 100644 index 0000000..8311db4 --- /dev/null +++ b/mmcv/model_zoo/open_mmlab.json @@ -0,0 +1,50 @@ +{ + "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth", + "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth", + "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth", + "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth", + "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth", + "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth", + "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth", + "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth", + "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth", + "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth", + "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth", + "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth", + "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth", + "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth", + "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth", + "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth", + "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth", + "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth", + "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth", + "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth", + "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth", + "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth", + "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth", + "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth", + "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth", + "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth", + "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth", + "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth", + "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth", + "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth", + "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth", + "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth", + "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth", + "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth", + "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth", + "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth", + "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth", + "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth", + "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth", + "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth", + "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth", + "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth", + "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth", + "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth", + "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth", + "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth", + "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth", + "mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth" +} diff --git a/mmcv/onnx/__init__.py b/mmcv/onnx/__init__.py new file mode 100644 index 0000000..0d7eb5b --- /dev/null +++ b/mmcv/onnx/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .info import is_custom_op_loaded +from .symbolic import register_extra_symbolics + +__all__ = ['register_extra_symbolics', 'is_custom_op_loaded'] diff --git a/mmcv/onnx/info.py b/mmcv/onnx/info.py new file mode 100644 index 0000000..e599973 --- /dev/null +++ b/mmcv/onnx/info.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os + +import torch + + +def is_custom_op_loaded(): + flag = False + try: + from ..tensorrt import is_tensorrt_plugin_loaded + flag = is_tensorrt_plugin_loaded() + except (ImportError, ModuleNotFoundError): + pass + if not flag: + try: + from ..ops import get_onnxruntime_op_path + ort_lib_path = get_onnxruntime_op_path() + flag = os.path.exists(ort_lib_path) + except (ImportError, ModuleNotFoundError): + pass + return flag or torch.__version__ == 'parrots' diff --git a/mmcv/onnx/onnx_utils/__init__.py b/mmcv/onnx/onnx_utils/__init__.py new file mode 100644 index 0000000..ef101fe --- /dev/null +++ b/mmcv/onnx/onnx_utils/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/mmcv/onnx/onnx_utils/symbolic_helper.py b/mmcv/onnx/onnx_utils/symbolic_helper.py new file mode 100644 index 0000000..a9a31eb --- /dev/null +++ b/mmcv/onnx/onnx_utils/symbolic_helper.py @@ -0,0 +1,331 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/pytorch/pytorch.""" +import warnings +from functools import wraps +from sys import maxsize + +import torch +import torch.onnx +# This import monkey-patches graph manipulation methods on Graph, used for the +# ONNX symbolics +import torch.onnx.utils +from torch._C import ListType + +# --------------------------------------------------------------------------------- +# Helper functions +# --------------------------------------------------------------------------------- + +# Save some builtins as locals, because we'll shadown them below +_sum = sum + + +def _parse_arg(value, desc): + if desc == 'none': + return value + if desc == 'v' or not _is_value(value): + return value + if value.node().mustBeNone(): + return None + if value.node().kind() == 'onnx::Constant': + tval = value.node()['value'] + if desc == 'i': + return int(tval) + elif desc == 'f': + return float(tval) + elif desc == 'b': + return bool(tval) + elif desc == 's': + return str(tval) + elif desc == 't': + return tval + elif desc == 'is': + return [int(v) for v in tval] + elif desc == 'fs': + return [float(v) for v in tval] + else: + raise RuntimeError( + "ONNX symbolic doesn't know to interpret Constant node") + elif value.node().kind() == 'prim::ListConstruct': + if desc == 'is': + for v in value.node().inputs(): + if v.node().kind() != 'onnx::Constant': + raise RuntimeError( + "Failed to export an ONNX attribute '" + + v.node().kind() + + "', since it's not constant, please try to make " + 'things (e.g., kernel size) static if possible') + return [int(v.node()['value']) for v in value.node().inputs()] + else: + raise RuntimeError( + "ONNX symbolic doesn't know to interpret ListConstruct node") + + raise RuntimeError('Unexpected node type: {}'.format(value.node().kind())) + + +def _maybe_get_const(value, desc): + if _is_value(value) and value.node().kind() == 'onnx::Constant': + return _parse_arg(value, desc) + return value + + +def _maybe_get_scalar(value): + value_t = _maybe_get_const(value, 't') + if isinstance(value_t, torch.Tensor) and value_t.shape == (): + return value_t + return value + + +def _get_const(value, desc, arg_name): + if _is_value(value) and value.node().kind() not in ('onnx::Constant', + 'prim::Constant'): + raise RuntimeError('ONNX symbolic expected a constant' + ' value of the {} argument, got `{}`'.format( + arg_name, value)) + return _parse_arg(value, desc) + + +def _unpack_list(list_value): + list_node = list_value.node() + assert list_node.kind() == 'prim::ListConstruct' + return list(list_node.inputs()) + + +# Check if list_value is output from prim::ListConstruct +# This is usually called before _unpack_list to ensure the list can be +# unpacked. +def _is_packed_list(list_value): + return _is_value( + list_value) and list_value.node().kind() == 'prim::ListConstruct' + + +def parse_args(*arg_descriptors): + + def decorator(fn): + fn._arg_descriptors = arg_descriptors + + def wrapper(g, *args): + # some args may be optional, so the length may be smaller + assert len(arg_descriptors) >= len(args) + args = [ + _parse_arg(arg, arg_desc) + for arg, arg_desc in zip(args, arg_descriptors) + ] + return fn(g, *args) + + # In Python 2 functools.wraps chokes on partially applied functions, so + # we need this as a workaround + try: + wrapper = wraps(fn)(wrapper) + except Exception: + pass + return wrapper + + return decorator + + +def _scalar(x): + """Convert a scalar tensor into a Python value.""" + assert x.numel() == 1 + return x.item() + + +def _if_scalar_type_as(g, self, tensor): + """Convert self into the same type of tensor, as necessary.""" + if isinstance(self, torch._C.Value): + return self + + scalar_type = tensor.type().scalarType() + if scalar_type: + ty = scalar_type.lower() + return getattr(self, ty)() + + return self + + +def _is_none(x): + return x.node().mustBeNone() + + +def _is_value(x): + return isinstance(x, torch._C.Value) + + +def _is_tensor_list(x): + return x.type().isSubtypeOf(ListType.ofTensors()) + + +def _unimplemented(op, msg): + warnings.warn('ONNX export failed on ' + op + ' because ' + msg + + ' not supported') + + +def _try_get_scalar_type(*args): + for arg in args: + try: + return arg.type().scalarType() + except RuntimeError: + pass + return None + + +def _topk_helper(g, input, k, dim, largest=True, sorted=False, out=None): + if out is not None: + _unimplemented('TopK', 'Out parameter is not supported') + if not _is_value(k): + k = g.op('Constant', value_t=torch.tensor([k], dtype=torch.int64)) + else: + k = g.op('Reshape', k, g.op('Constant', value_t=torch.tensor([1]))) + return g.op( + 'TopK', + input, + k, + axis_i=dim, + largest_i=largest, + sorted_i=sorted, + outputs=2) + + +def _slice_helper(g, + input, + axes, + starts, + ends, + steps=None, + dynamic_slice=False): + # TODO(ruobing): add support for opset<10 + from torch.onnx.symbolic_opset10 import _slice + return _slice(g, input, axes, starts, ends, steps, dynamic_slice) + + +def _unsqueeze_helper(g, input, dim): + from torch.onnx.symbolic_opset9 import unsqueeze + return unsqueeze(g, input, dim) + + +def _interpolate_size_to_scales(g, input, output_size, dim): + output_size = _maybe_get_const(output_size, 'is') + if _is_value(output_size): + offset = 2 + offsets = g.op( + 'Constant', value_t=torch.ones(offset, dtype=torch.float32)) + dividend = g.op( + 'Cast', output_size, to_i=cast_pytorch_to_onnx['Float']) + divisor = _slice_helper( + g, g.op('Shape', input), axes=[0], ends=[maxsize], starts=[offset]) + divisor = g.op('Cast', divisor, to_i=cast_pytorch_to_onnx['Float']) + scale_dims = g.op('Div', dividend, divisor) + scales = g.op('Concat', offsets, scale_dims, axis_i=0) + else: + scales_constant = [ + 1. if i < 2 else float(output_size[-(dim - i)]) / + float(input.type().sizes()[-(dim - i)]) for i in range(0, dim) + ] + scales = g.op( + 'Constant', + value_t=torch.tensor(scales_constant, dtype=torch.float32)) + return scales + + +def _interpolate_get_scales_if_available(g, scales): + if len(scales) == 0: + return None + # scales[0] is NoneType in Pytorch == 1.5.1 + # scales[0] is TensorType with sizes = [] in Pytorch == 1.6.0 + # scales[0] is ListType in Pytorch == 1.7.0 + # scales[0] is TensorType with sizes = [2] in Pytorch == 1.8.0 + scale_desc = 'fs' if scales[0].type().kind() == 'ListType' or ( + scales[0].type().kind() == 'TensorType' and + (sum(scales[0].type().sizes()) > 1)) else 'f' + available_scales = _maybe_get_const( + scales[0], scale_desc) != -1 and not _is_none(scales[0]) + + if not available_scales: + return None + + offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32)) + if scale_desc == 'fs': + scales_list = g.op( + 'Constant', + value_t=torch.tensor(_maybe_get_const(scales[0], scale_desc))) + # modify to support PyTorch==1.7.0 + # https://github.com/pytorch/pytorch/blob/75ee5756715e7161314ce037474843b68f69fc04/torch/onnx/symbolic_helper.py#L375 # noqa: E501 + scales = g.op('Concat', offsets, scales_list, axis_i=0) + else: + # for PyTorch < 1.7.0 + scales_list = [] + for scale in scales: + unsqueezed_scale = _unsqueeze_helper(g, scale, 0) + # ONNX only supports float for the scales. double -> float. + unsqueezed_scale = g.op( + 'Cast', unsqueezed_scale, to_i=cast_pytorch_to_onnx['Float']) + scales_list.append(unsqueezed_scale) + scales = g.op('Concat', offsets, *scales_list, axis_i=0) + return scales + + +def _get_interpolate_attributes(g, mode, args): + if mode == 'nearest': + align_corners = None + scales = args[0:] + else: + align_corners = args[0] + scales = args[1:] + scales = _interpolate_get_scales_if_available(g, scales) + return scales, align_corners + + +def _interpolate_get_scales(g, scale_factor, dim): + offsets = g.op('Constant', value_t=torch.ones(2, dtype=torch.float32)) + if isinstance(scale_factor.type(), torch._C.ListType): + return g.op('Concat', offsets, scale_factor, axis_i=0) + else: + scale_factor = _unsqueeze_helper(g, scale_factor, 0) + scale_factor = g.op( + 'Cast', scale_factor, to_i=cast_pytorch_to_onnx['Float']) + scales = [scale_factor for i in range(dim - 2)] + scale_factor = g.op('Concat', offsets, *scales, axis_i=0) + return scale_factor + + +def _size_helper(g, self, dim): + full_shape = g.op('Shape', self) + from torch.onnx.symbolic_opset9 import select + return select(g, full_shape, g.op('Constant', value_t=torch.tensor([0])), + dim) + + +def _avgpool_helper(tuple_fn, padding, kernel_size, stride, divisor_override, + name): + if divisor_override and divisor_override.node().kind() != 'prim::Constant': + return _unimplemented(name, 'divisor_override') + if not stride: + stride = kernel_size + padding = tuple(tuple_fn(padding)) + return padding + + +# Metaprogram symbolics for each ATen native specialized cast operator. +# For e.g. we specify a function named `_cast_uint8_t` that instantiates an +# ONNX cast node with `to` attribute 'UINT8' +# +# TODO: remove these once we support Type's in the JIT IR and we can once again +# use the unified toType operator +cast_pytorch_to_onnx = { + 'Byte': torch.onnx.TensorProtoDataType.UINT8, + 'Char': torch.onnx.TensorProtoDataType.INT8, + 'Double': torch.onnx.TensorProtoDataType.DOUBLE, + 'Float': torch.onnx.TensorProtoDataType.FLOAT, + 'Half': torch.onnx.TensorProtoDataType.FLOAT16, + 'Int': torch.onnx.TensorProtoDataType.INT32, + 'Long': torch.onnx.TensorProtoDataType.INT64, + 'Short': torch.onnx.TensorProtoDataType.INT16, + 'Bool': torch.onnx.TensorProtoDataType.BOOL, + 'ComplexFloat': torch.onnx.TensorProtoDataType.COMPLEX64, + 'ComplexDouble': torch.onnx.TensorProtoDataType.COMPLEX128, + 'Undefined': torch.onnx.TensorProtoDataType.UNDEFINED, +} + +# Global set to store the list of quantized operators in the network. +# This is currently only used in the conversion of quantized ops from PT +# -> C2 via ONNX. +_quantized_ops = set() diff --git a/mmcv/onnx/symbolic.py b/mmcv/onnx/symbolic.py new file mode 100644 index 0000000..94cc1c6 --- /dev/null +++ b/mmcv/onnx/symbolic.py @@ -0,0 +1,496 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/pytorch/pytorch.""" +import os + +import numpy as np +import torch +from torch.nn.modules.utils import _pair, _single, _triple +from torch.onnx.symbolic_helper import parse_args +from torch.onnx.symbolic_registry import register_op + +from .onnx_utils import symbolic_helper as sym_help + + +def _interpolate(name, dim, interpolate_mode): + + def symbolic_fn(g, input, output_size, *args): + scales, align_corners = sym_help._get_interpolate_attributes( + g, interpolate_mode, args) + align_corners = sym_help._maybe_get_scalar(align_corners) + transformation_mode = 'asymmetric' \ + if interpolate_mode == 'nearest' \ + else 'align_corners' if align_corners else 'pytorch_half_pixel' + empty_tensor = g.op( + 'Constant', value_t=torch.tensor([], dtype=torch.float32)) + + if scales is None: + if 'ONNX_BACKEND' in os.environ and os.environ[ + 'ONNX_BACKEND'] == 'TensorRT': + input_size = input.type().sizes() + # slice the first two dim + input_size = input_size[:2] + # convert output_size to int type + output_size = sym_help._maybe_get_const(output_size, 'is') + input_size.extend(output_size) + output_size = g.op( + 'Constant', + value_t=torch.tensor(input_size, dtype=torch.int64)) + else: + input_size = g.op('Shape', input) + input_size_beg = sym_help._slice_helper( + g, input_size, axes=[0], ends=[2], starts=[0]) + output_size = g.op( + 'Cast', + output_size, + to_i=sym_help.cast_pytorch_to_onnx['Long']) + output_size = g.op( + 'Concat', input_size_beg, output_size, axis_i=0) + scales = g.op( + 'Constant', value_t=torch.tensor([], dtype=torch.float32)) + return g.op( + 'Resize', + input, + empty_tensor, + # roi only takes effect with + # coordinate_transformation_mode="tf_crop_and_resize" + scales, # scales is not needed since we are sending out_size + output_size, + coordinate_transformation_mode_s=transformation_mode, + cubic_coeff_a_f=-0.75, # only valid when mode="cubic" + mode_s=interpolate_mode, # nearest, linear, or cubic + nearest_mode_s='floor') # only valid when mode="nearest" + else: + return g.op( + 'Resize', + input, + empty_tensor, + # roi only takes effect with + # coordinate_transformation_mode="tf_crop_and_resize" + scales, # scales is not needed since we are sending out_size + coordinate_transformation_mode_s=transformation_mode, + cubic_coeff_a_f=-0.75, # only valid when mode="cubic" + mode_s=interpolate_mode, # nearest, linear, or cubic + nearest_mode_s='floor') # only valid when mode="nearest" + + return symbolic_fn + + +upsample_nearest1d = _interpolate('upsample_nearest1d', 3, 'nearest') +upsample_nearest2d = _interpolate('upsample_nearest2d', 4, 'nearest') +upsample_nearest3d = _interpolate('upsample_nearest3d', 5, 'nearest') +upsample_linear1d = _interpolate('upsample_linear1d', 3, 'linear') +upsample_bilinear2d = _interpolate('upsample_bilinear2d', 4, 'linear') +upsample_trilinear3d = _interpolate('upsample_trilinear3d', 5, 'linear') +upsample_bicubic2d = _interpolate('upsample_bicubic2d', 4, 'cubic') + + +@parse_args('v', 'v', 'i', 'i', 'i', 'none') +def topk(g, self, k, dim, largest, sorted, out=None): + return sym_help._topk_helper( + g, self, k, dim, largest=largest, sorted=sorted, out=out) + + +def masked_select(g, self, mask): + from torch.onnx.symbolic_opset9 import expand_as, nonzero + index = nonzero(g, expand_as(g, mask, self)) + return g.op('GatherND', self, index) + + +def _prepare_onnx_paddings(g, dim, pad): + pad_len = torch.onnx.symbolic_opset9.size( + g, pad, g.op('Constant', value_t=torch.tensor([0]))) + # Set extension = [0] * (dim * 2 - len(pad)) + extension = g.op( + 'Sub', + g.op('Mul', + g.op('Constant', value_t=torch.tensor(dim, dtype=torch.int64)), + g.op('Constant', value_t=torch.tensor(2, dtype=torch.int64))), + pad_len) + pad = g.op('Cast', pad, to_i=sym_help.cast_pytorch_to_onnx['Long']) + paddings = g.op( + 'Concat', + pad, + g.op( + 'ConstantOfShape', + extension, + value_t=torch.tensor([0], dtype=torch.int64)), + axis_i=0) + paddings = g.op('Reshape', paddings, + g.op('Constant', value_t=torch.tensor([-1, 2]))) + paddings = g.op( + 'Transpose', + torch.onnx.symbolic_opset10.flip(g, paddings, [0]), + perm_i=[1, 0]) + paddings = g.op('Reshape', paddings, + g.op('Constant', value_t=torch.tensor([-1]))) + padding_c = g.op( + 'Cast', paddings, to_i=sym_help.cast_pytorch_to_onnx['Long']) + return padding_c + + +def constant_pad_nd(g, input, padding, value=None): + mode = 'constant' + value = sym_help._maybe_get_scalar(value) + value = sym_help._if_scalar_type_as(g, value, input) + pad = _prepare_onnx_paddings(g, input.type().dim(), padding) + return g.op('Pad', input, pad, value, mode_s=mode) + + +def reflection_pad(g, input, padding): + mode = 'reflect' + paddings = _prepare_onnx_paddings(g, input.type().dim(), padding) + return g.op('Pad', input, paddings, mode_s=mode) + + +reflection_pad1d = reflection_pad +reflection_pad2d = reflection_pad +reflection_pad3d = reflection_pad + + +def _avg_pool(name, tuple_fn): + + @parse_args('v', 'is', 'is', 'is', 'i', 'i', 'none') + def symbolic_fn(g, + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override=None): + padding = sym_help._avgpool_helper(tuple_fn, padding, kernel_size, + stride, divisor_override, name) + if not stride: + stride = kernel_size + if count_include_pad: + input = g.op( + 'Pad', + input, + g.op( + 'Constant', + value_t=torch.tensor(((0, ) * 2 + padding) * 2)), + mode_s='constant') + padding = (0, ) * len(padding) + output = g.op( + 'AveragePool', + input, + kernel_shape_i=tuple_fn(kernel_size), + strides_i=tuple_fn(stride), + pads_i=padding * 2, + ceil_mode_i=ceil_mode) + return output + + return symbolic_fn + + +avg_pool1d = _avg_pool('avg_pool1d', _single) +avg_pool2d = _avg_pool('avg_pool2d', _pair) +avg_pool3d = _avg_pool('avg_pool3d', _triple) + + +def _get_im2col_indices_along_dim(g, input_d, kernel_size_d, dilation_d, + padding_d, stride_d): + # Input is always 4-D (N, C, H, W) + # Calculate indices of sliding blocks along spatial dimension + # Slide kernel over input each dim d: + # each dimension d ranges from 0 to + # input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1) + # with steps = stride + + blocks_d = g.op('Add', input_d, + g.op('Constant', value_t=torch.tensor(padding_d * 2))) + blocks_d = g.op( + 'Sub', blocks_d, + g.op( + 'Constant', + value_t=torch.tensor(dilation_d * (kernel_size_d - 1)))) + + # Stride kernel over input and find starting indices along dim d + blocks_d_indices = g.op('Range', g.op('Constant', value_t=torch.tensor(0)), + blocks_d, + g.op('Constant', value_t=torch.tensor(stride_d))) + + # Apply dilation on kernel and find its indices along dim d + kernel_grid = np.arange(0, kernel_size_d * dilation_d, dilation_d) + kernel_grid = g.op('Constant', value_t=torch.tensor([kernel_grid])) + + # Broadcast and add kernel staring positions (indices) with + # kernel_grid along dim d, to get block indices along dim d + blocks_d_indices = g.op( + 'Unsqueeze', blocks_d_indices, axes_i=[0]) # Reshape to [1, -1] + kernel_mask = g.op('Reshape', kernel_grid, + g.op('Constant', value_t=torch.tensor([-1, 1]))) + block_mask = g.op('Add', blocks_d_indices, kernel_mask) + + return block_mask + + +def _get_im2col_padded_input(g, input, padding_h, padding_w): + # Input is always 4-D tensor (N, C, H, W) + # Padding tensor has the following format: (padding_h, padding_w) + # Reshape the padding to follow ONNX format: + # (dim1_begin, dim2_begin,...,dim1_end, dim2_end,...) + pad = g.op( + 'Constant', value_t=torch.LongTensor([0, 0, padding_h, padding_w] * 2)) + return g.op('Pad', input, pad) + + +def _get_im2col_output_shape(g, input, kernel_h, kernel_w): + batch_dim = size(g, input, g.op('Constant', value_t=torch.tensor(0))) + channel_dim = size(g, input, g.op('Constant', value_t=torch.tensor(1))) + channel_unfolded = g.op( + 'Mul', channel_dim, + g.op('Constant', value_t=torch.tensor(kernel_h * kernel_w))) + + return g.op( + 'Concat', + g.op('Unsqueeze', batch_dim, axes_i=[0]), + g.op('Unsqueeze', channel_unfolded, axes_i=[0]), + g.op('Constant', value_t=torch.tensor([-1])), + axis_i=0) + + +def size(g, self, dim=None): + if dim is None: + return g.op('Shape', self) + return sym_help._size_helper(g, self, dim) + + +@parse_args('v', 'is', 'is', 'is', 'is') +def im2col(g, input, kernel_size, dilation, padding, stride): + # Input is always 4-D tensor (N, C, H, W) + # All other args are int[2] + + input_h = size(g, input, g.op('Constant', value_t=torch.tensor(2))) + input_w = size(g, input, g.op('Constant', value_t=torch.tensor(3))) + + stride_h, stride_w = stride[0], stride[1] + padding_h, padding_w = padding[0], padding[1] + dilation_h, dilation_w = dilation[0], dilation[1] + kernel_h, kernel_w = kernel_size[0], kernel_size[1] + + blocks_row_indices = _get_im2col_indices_along_dim(g, input_h, kernel_h, + dilation_h, padding_h, + stride_h) + blocks_col_indices = _get_im2col_indices_along_dim(g, input_w, kernel_w, + dilation_w, padding_w, + stride_w) + + output_shape = _get_im2col_output_shape(g, input, kernel_h, kernel_w) + padded_input = _get_im2col_padded_input(g, input, padding_h, padding_w) + + output = g.op('Gather', padded_input, blocks_row_indices, axis_i=2) + output = g.op('Gather', output, blocks_col_indices, axis_i=4) + output = g.op('Transpose', output, perm_i=[0, 1, 2, 4, 3, 5]) + return g.op('Reshape', output, output_shape) + + +@parse_args('v', 'i') +def one_hot(g, self, num_classes): + values = g.op('Constant', value_t=torch.LongTensor([0, 1])) + depth = g.op('Constant', value_t=torch.LongTensor([num_classes])) + return g.op('OneHot', self, depth, values, axis_i=-1) + + +@parse_args('v', 'i', 'none') +def softmax(g, input, dim, dtype=None): + input_dim = input.type().dim() + if input_dim: + # TODO: remove this as onnx opset 11 spec allows negative axes + if dim < 0: + dim = input_dim + dim + if input_dim == dim + 1: + softmax = g.op('Softmax', input, axis_i=dim) + if dtype and dtype.node().kind() != 'prim::Constant': + parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype') + softmax = g.op( + 'Cast', + softmax, + to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) + return softmax + + max_value = g.op('ReduceMax', input, axes_i=[dim], keepdims_i=1) + input = g.op('Sub', input, max_value) + exp = g.op('Exp', input) + sum = g.op('ReduceSum', exp, axes_i=[dim]) + softmax = g.op('Div', exp, sum) + if dtype and dtype.node().kind() != 'prim::Constant': + parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype') + softmax = g.op( + 'Cast', softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype]) + return softmax + + +def _adaptive_pool(name, type, tuple_fn, fn=None): + + @parse_args('v', 'is') + def symbolic_fn(g, input, output_size): + if output_size == [1] * len(output_size) and type == 'AveragePool': + return g.op('GlobalAveragePool', input) + if not input.isCompleteTensor(): + if output_size == [1] * len(output_size): + return g.op('GlobalMaxPool', input), None + raise NotImplementedError( + '[Adaptive pool]:input size not accessible') + dim = input.type().sizes()[2:] + if output_size == [1] * len(output_size) and type == 'MaxPool': + return g.op('GlobalMaxPool', input), None + + # compute stride = floor(input_size / output_size) + s = [int(dim[i] / output_size[i]) for i in range(0, len(dim))] + + # compute kernel_size = input_size - (output_size - 1) * stride + k = [dim[i] - (output_size[i] - 1) * s[i] for i in range(0, len(dim))] + + # call max_poolxd_with_indices to get indices in the output + if type == 'MaxPool': + return fn(g, input, k, k, (0, ) * len(dim), (1, ) * len(dim), + False) + output = g.op( + type, + input, + kernel_shape_i=tuple_fn(k), + strides_i=tuple_fn(s), + ceil_mode_i=False) + return output + + return symbolic_fn + + +adaptive_avg_pool1d = _adaptive_pool('adaptive_avg_pool1d', 'AveragePool', + _single) +adaptive_avg_pool2d = _adaptive_pool('adaptive_avg_pool2d', 'AveragePool', + _pair) +adaptive_avg_pool3d = _adaptive_pool('adaptive_avg_pool3d', 'AveragePool', + _triple) + + +def new_full(g, + self, + size, + fill_value, + dtype, + layout, + device, + pin_memory=False): + from torch.onnx.symbolic_opset9 import full + if dtype is None and self.isCompleteTensor(): + dtype = self.type().scalarType() + dtype = sym_help.scalar_type_to_onnx.index( + sym_help.cast_pytorch_to_onnx[dtype]) + return full(g, size, fill_value, dtype, layout, device, pin_memory) + + +@parse_args('v', 'v', 'i', 'i', 'i') +def grid_sampler(g, + input, + grid, + interpolation_mode, + padding_mode, + align_corners=False): + return g.op( + 'mmcv::grid_sampler', + input, + grid, + interpolation_mode_i=interpolation_mode, + padding_mode_i=padding_mode, + align_corners_i=align_corners) + + +@parse_args('v', 'i') +def cummax(g, input, dim): + return g.op('mmcv::cummax', input, dim_i=dim, outputs=2) + + +@parse_args('v', 'i') +def cummin(g, input, dim): + return g.op('mmcv::cummin', input, dim_i=dim, outputs=2) + + +@parse_args('v', 'v', 'is') +def roll(g, input, shifts, dims): + from torch.onnx.symbolic_opset9 import squeeze + from packaging import version + input_shape = g.op('Shape', input) + + need_flatten = len(dims) == 0 + # If dims is not specified, the tensor will be flattened before + # rolling and then restored to the original shape. + if need_flatten: + resize_shape = input_shape + input = g.op('Reshape', input, + g.op('Constant', value_t=torch.LongTensor([1, -1]))) + input_shape = g.op('Shape', input) + dims = [1] + + for index, dim in enumerate(dims): + end_size = sym_help._slice_helper( + g, input_shape, axes=[0], ends=[dim + 1], starts=[dim]) + shift_size = sym_help._slice_helper( + g, shifts, axes=[0], ends=[index + 1], starts=[index]) + slice_size = g.op('Sub', end_size, shift_size) + + # Can not use Mod because tensorrt does not support + div_size = g.op('Div', slice_size, end_size) + slice_size = g.op('Sub', slice_size, g.op('Mul', end_size, div_size)) + + if version.parse(torch.__version__) >= version.parse('1.7.0'): + # add dim=0 for pytorch 1.9.0 + end_size = squeeze(g, end_size, 0) + slice_size = squeeze(g, slice_size, 0) + else: + end_size = g.op('Squeeze', end_size) + slice_size = g.op('Squeeze', slice_size) + dim = torch.LongTensor([dim]) + + input_slice0 = sym_help._slice_helper( + g, + input, + axes=dim, + starts=torch.LongTensor([0]), + ends=slice_size, + dynamic_slice=True) + input_slice1 = sym_help._slice_helper( + g, + input, + axes=dim, + ends=end_size, + starts=slice_size, + dynamic_slice=True) + + input = g.op('Concat', input_slice1, input_slice0, axis_i=dim) + + if need_flatten: + input = g.op('Reshape', input, resize_shape) + + return input + + +def register_extra_symbolics(opset=11): + register_op('one_hot', one_hot, '', opset) + register_op('im2col', im2col, '', opset) + register_op('topk', topk, '', opset) + register_op('softmax', softmax, '', opset) + register_op('constant_pad_nd', constant_pad_nd, '', opset) + register_op('reflection_pad1d', reflection_pad1d, '', opset) + register_op('reflection_pad2d', reflection_pad2d, '', opset) + register_op('reflection_pad3d', reflection_pad3d, '', opset) + register_op('avg_pool1d', avg_pool1d, '', opset) + register_op('avg_pool2d', avg_pool2d, '', opset) + register_op('avg_pool3d', avg_pool3d, '', opset) + register_op('adaptive_avg_pool1d', adaptive_avg_pool1d, '', opset) + register_op('adaptive_avg_pool2d', adaptive_avg_pool2d, '', opset) + register_op('adaptive_avg_pool3d', adaptive_avg_pool3d, '', opset) + register_op('masked_select', masked_select, '', opset) + register_op('upsample_nearest1d', upsample_nearest1d, '', opset) + register_op('upsample_nearest2d', upsample_nearest2d, '', opset) + register_op('upsample_nearest3d', upsample_nearest3d, '', opset) + register_op('upsample_linear1d', upsample_linear1d, '', opset) + register_op('upsample_bilinear2d', upsample_bilinear2d, '', opset) + register_op('upsample_trilinear3d', upsample_trilinear3d, '', opset) + register_op('upsample_bicubic2d', upsample_bicubic2d, '', opset) + register_op('new_full', new_full, '', opset) + register_op('grid_sampler', grid_sampler, '', opset) + register_op('cummax', cummax, '', opset) + register_op('cummin', cummin, '', opset) + register_op('roll', roll, '', opset) diff --git a/mmcv/ops/__init__.py b/mmcv/ops/__init__.py old mode 100755 new mode 100644 index cffbd23..999e090 --- a/mmcv/ops/__init__.py +++ b/mmcv/ops/__init__.py @@ -1,19 +1,12 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .active_rotated_filter import active_rotated_filter from .assign_score_withk import assign_score_withk from .ball_query import ball_query from .bbox import bbox_overlaps -from .bezier_align import BezierAlign, bezier_align -from .bias_act import bias_act from .border_align import BorderAlign, border_align -from .box_iou_quadri import box_iou_quadri from .box_iou_rotated import box_iou_rotated from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive from .cc_attention import CrissCrossAttention -from .chamfer_distance import chamfer_distance from .contour_expand import contour_expand -from .conv2d_gradfix import conv2d, conv_transpose2d -from .convex_iou import convex_giou, convex_iou from .corner_pool import CornerPool from .correlation import Correlation from .deform_conv import DeformConv2d, DeformConv2dPack, deform_conv2d @@ -23,8 +16,6 @@ from .deprecated_wrappers import Conv2d_deprecated as Conv2d from .deprecated_wrappers import ConvTranspose2d_deprecated as ConvTranspose2d from .deprecated_wrappers import Linear_deprecated as Linear from .deprecated_wrappers import MaxPool2d_deprecated as MaxPool2d -from .diff_iou_rotated import diff_iou_rotated_2d, diff_iou_rotated_3d -from .filtered_lrelu import filtered_lrelu from .focal_loss import (SigmoidFocalLoss, SoftmaxFocalLoss, sigmoid_focal_loss, softmax_focal_loss) from .furthest_point_sample import (furthest_point_sample, @@ -32,46 +23,35 @@ from .furthest_point_sample import (furthest_point_sample, from .fused_bias_leakyrelu import FusedBiasLeakyReLU, fused_bias_leakyrelu from .gather_points import gather_points from .group_points import GroupAll, QueryAndGroup, grouping_operation -from .info import get_compiler_version, get_compiling_cuda_version -from .iou3d import (boxes_iou3d, boxes_iou_bev, boxes_overlap_bev, nms3d, - nms3d_normal, nms_bev, nms_normal_bev) +from .info import (get_compiler_version, get_compiling_cuda_version, + get_onnxruntime_op_path) +from .iou3d import boxes_iou_bev, nms_bev, nms_normal_bev from .knn import knn from .masked_conv import MaskedConv2d, masked_conv2d -from .min_area_polygons import min_area_polygons from .modulated_deform_conv import (ModulatedDeformConv2d, ModulatedDeformConv2dPack, modulated_deform_conv2d) from .multi_scale_deform_attn import MultiScaleDeformableAttention -from .nms import batched_nms, nms, nms_match, nms_quadri, nms_rotated, soft_nms +from .nms import batched_nms, nms, nms_match, nms_rotated, soft_nms from .pixel_group import pixel_group from .point_sample import (SimpleRoIAlign, point_sample, rel_roi_point_to_rel_img_point) from .points_in_boxes import (points_in_boxes_all, points_in_boxes_cpu, points_in_boxes_part) -from .points_in_polygons import points_in_polygons from .points_sampler import PointsSampler -from .prroi_pool import PrRoIPool, prroi_pool from .psa_mask import PSAMask -from .riroi_align_rotated import RiRoIAlignRotated, riroi_align_rotated from .roi_align import RoIAlign, roi_align from .roi_align_rotated import RoIAlignRotated, roi_align_rotated from .roi_pool import RoIPool, roi_pool from .roiaware_pool3d import RoIAwarePool3d from .roipoint_pool3d import RoIPointPool3d -from .rotated_feature_align import rotated_feature_align from .saconv import SAConv2d from .scatter_points import DynamicScatter, dynamic_scatter -from .sparse_conv import (SparseConv2d, SparseConv3d, SparseConvTranspose2d, - SparseConvTranspose3d, SparseInverseConv2d, - SparseInverseConv3d, SubMConv2d, SubMConv3d) -from .sparse_modules import SparseModule, SparseSequential -from .sparse_pool import SparseMaxPool2d, SparseMaxPool3d -from .sparse_structure import SparseConvTensor, scatter_nd from .sync_bn import SyncBatchNorm from .three_interpolate import three_interpolate from .three_nn import three_nn from .tin_shift import TINShift, tin_shift -from .upfirdn2d import filter2d, upfirdn2d, upsample2d +from .upfirdn2d import upfirdn2d from .voxelize import Voxelization, voxelization __all__ = [ @@ -80,32 +60,22 @@ __all__ = [ 'deform_conv2d', 'DeformRoIPool', 'DeformRoIPoolPack', 'ModulatedDeformRoIPoolPack', 'deform_roi_pool', 'SigmoidFocalLoss', 'SoftmaxFocalLoss', 'sigmoid_focal_loss', 'softmax_focal_loss', - 'get_compiler_version', 'get_compiling_cuda_version', 'MaskedConv2d', - 'masked_conv2d', 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack', + 'get_compiler_version', 'get_compiling_cuda_version', + 'get_onnxruntime_op_path', 'MaskedConv2d', 'masked_conv2d', + 'ModulatedDeformConv2d', 'ModulatedDeformConv2dPack', 'modulated_deform_conv2d', 'batched_nms', 'nms', 'soft_nms', 'nms_match', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 'SyncBatchNorm', 'Conv2d', 'ConvTranspose2d', 'Linear', 'MaxPool2d', 'CrissCrossAttention', 'PSAMask', 'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign', 'SAConv2d', 'TINShift', 'tin_shift', 'assign_score_withk', - 'box_iou_rotated', 'box_iou_quadri', 'RoIPointPool3d', 'nms_rotated', - 'knn', 'ball_query', 'upfirdn2d', 'FusedBiasLeakyReLU', - 'fused_bias_leakyrelu', 'rotated_feature_align', 'RiRoIAlignRotated', - 'riroi_align_rotated', 'RoIAlignRotated', 'roi_align_rotated', - 'pixel_group', 'QueryAndGroup', 'GroupAll', 'grouping_operation', - 'contour_expand', 'three_nn', 'three_interpolate', - 'MultiScaleDeformableAttention', 'BorderAlign', 'border_align', - 'gather_points', 'furthest_point_sample', 'nms_quadri', + 'box_iou_rotated', 'RoIPointPool3d', 'nms_rotated', 'knn', 'ball_query', + 'upfirdn2d', 'FusedBiasLeakyReLU', 'fused_bias_leakyrelu', + 'RoIAlignRotated', 'roi_align_rotated', 'pixel_group', 'QueryAndGroup', + 'GroupAll', 'grouping_operation', 'contour_expand', 'three_nn', + 'three_interpolate', 'MultiScaleDeformableAttention', 'BorderAlign', + 'border_align', 'gather_points', 'furthest_point_sample', 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', - 'boxes_iou3d', 'boxes_iou_bev', 'boxes_overlap_bev', 'nms_bev', - 'nms_normal_bev', 'nms3d', 'nms3d_normal', 'Voxelization', 'voxelization', - 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', 'SparseConv2d', - 'SparseConv3d', 'SparseConvTranspose2d', 'SparseConvTranspose3d', - 'SparseInverseConv2d', 'SparseInverseConv3d', 'SubMConv2d', 'SubMConv3d', - 'SparseModule', 'SparseSequential', 'SparseMaxPool2d', 'SparseMaxPool3d', - 'SparseConvTensor', 'scatter_nd', 'points_in_boxes_part', - 'points_in_boxes_cpu', 'points_in_boxes_all', 'points_in_polygons', - 'min_area_polygons', 'active_rotated_filter', 'convex_iou', 'convex_giou', - 'diff_iou_rotated_2d', 'diff_iou_rotated_3d', 'chamfer_distance', - 'PrRoIPool', 'prroi_pool', 'bias_act', 'filtered_lrelu', 'conv2d', - 'conv_transpose2d', 'filter2d', 'upsample2d', 'BezierAlign', 'bezier_align' + 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', + 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', + 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all' ] diff --git a/mmcv/ops/active_rotated_filter.py b/mmcv/ops/active_rotated_filter.py deleted file mode 100644 index b8ba43d..0000000 --- a/mmcv/ops/active_rotated_filter.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple - -import torch -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', - ['active_rotated_filter_forward', 'active_rotated_filter_backward']) - - -class ActiveRotatedFilterFunction(Function): - """Encoding the orientation information and generating orientation- - sensitive features. - - The details are described in the paper `Align Deep Features for Oriented - Object Detection _`. - """ - - @staticmethod - def forward(ctx, input: torch.Tensor, - indices: torch.Tensor) -> torch.Tensor: - """ - Args: - input (torch.Tensor): Input features with shape - [num_output_planes, num_input_planes, num_orientations, H, W]. - indices (torch.Tensor): Indices with shape - [num_orientations, H, W, num_rotations]. - - Returns: - torch.Tensor: Refined features with shape [num_output_planes * - num_rotations, num_input_planes * num_orientations, H, W]. - """ - ctx.save_for_backward(input, indices) - op, ip, o, h, w = input.size() - o, h, w, r = indices.size() - output = input.new_zeros((op * r, ip * o, h, w)) - ext_module.active_rotated_filter_forward(input, indices, output) - - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, None]: - """ - Args: - grad_output (torch.Tensor): The gradient of output features - with shape [num_output_planes * num_rotations, - num_input_planes * num_orientations, H, W]. - - Returns: - torch.Tensor: The gradient of input features with shape - [num_output_planes, num_input_planes, num_orientations, H, W]. - """ - input, indices = ctx.saved_tensors - grad_in = torch.zeros_like(input) - ext_module.active_rotated_filter_backward(grad_out, indices, grad_in) - return grad_in, None - - -active_rotated_filter = ActiveRotatedFilterFunction.apply diff --git a/mmcv/ops/assign_score_withk.py b/mmcv/ops/assign_score_withk.py index deca089..4906ada 100644 --- a/mmcv/ops/assign_score_withk.py +++ b/mmcv/ops/assign_score_withk.py @@ -1,6 +1,3 @@ -from typing import Tuple - -import torch from torch.autograd import Function from ..utils import ext_loader @@ -30,11 +27,11 @@ class AssignScoreWithK(Function): @staticmethod def forward(ctx, - scores: torch.Tensor, - point_features: torch.Tensor, - center_features: torch.Tensor, - knn_idx: torch.Tensor, - aggregate: str = 'sum') -> torch.Tensor: + scores, + point_features, + center_features, + knn_idx, + aggregate='sum'): """ Args: scores (torch.Tensor): (B, npoint, K, M), predicted scores to @@ -81,20 +78,15 @@ class AssignScoreWithK(Function): return output @staticmethod - def backward( - ctx, grad_out: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, None, None]: + def backward(ctx, grad_out): """ Args: grad_out (torch.Tensor): (B, out_dim, npoint, K) Returns: - tuple[torch.Tensor]: A tuple contains five elements. The first one - is the gradient of ``scores`` whose shape is (B, npoint, K, M). The - second is the gradient of ``point_features`` whose shape is - (B, N, M, out_dim). The third is the gradient of - ``center_features`` with the shape of (B, N, M, out_dim). The last - two are ``None``. + grad_scores (torch.Tensor): (B, npoint, K, M) + grad_point_features (torch.Tensor): (B, N, M, out_dim) + grad_center_features (torch.Tensor): (B, N, M, out_dim) """ _, point_features, center_features, scores, knn_idx = ctx.saved_tensors diff --git a/mmcv/ops/ball_query.py b/mmcv/ops/ball_query.py index a89b36b..d046684 100644 --- a/mmcv/ops/ball_query.py +++ b/mmcv/ops/ball_query.py @@ -1,86 +1,54 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Optional, Tuple - import torch from torch.autograd import Function from ..utils import ext_loader -ext_module = ext_loader.load_ext( - '_ext', ['ball_query_forward', 'stack_ball_query_forward']) +ext_module = ext_loader.load_ext('_ext', ['ball_query_forward']) class BallQuery(Function): """Find nearby points in spherical space.""" @staticmethod - def forward( - ctx, - min_radius: float, - max_radius: float, - sample_num: int, - xyz: torch.Tensor, - center_xyz: torch.Tensor, - xyz_batch_cnt: Optional[torch.Tensor] = None, - center_xyz_batch_cnt: Optional[torch.Tensor] = None - ) -> torch.Tensor: + def forward(ctx, min_radius: float, max_radius: float, sample_num: int, + xyz: torch.Tensor, center_xyz: torch.Tensor) -> torch.Tensor: """ Args: min_radius (float): minimum radius of the balls. max_radius (float): maximum radius of the balls. sample_num (int): maximum number of features in the balls. - xyz (torch.Tensor): (B, N, 3) xyz coordinates of the features, - or staked input (N1 + N2 ..., 3). - center_xyz (torch.Tensor): (B, npoint, 3) centers of the ball - query, or staked input (M1 + M2 ..., 3). - xyz_batch_cnt: (batch_size): Stacked input xyz coordinates nums in - each batch, just like (N1, N2, ...). Defaults to None. - New in version 1.7.0. - center_xyz_batch_cnt: (batch_size): Stacked centers coordinates - nums in each batch, just line (M1, M2, ...). Defaults to None. - New in version 1.7.0. + xyz (Tensor): (B, N, 3) xyz coordinates of the features. + center_xyz (Tensor): (B, npoint, 3) centers of the ball query. Returns: - torch.Tensor: (B, npoint, nsample) tensor with the indices of the - features that form the query balls. + Tensor: (B, npoint, nsample) tensor with the indices of + the features that form the query balls. """ assert center_xyz.is_contiguous() assert xyz.is_contiguous() assert min_radius < max_radius - if xyz_batch_cnt is not None and center_xyz_batch_cnt is not None: - assert xyz_batch_cnt.dtype == torch.int - assert center_xyz_batch_cnt.dtype == torch.int - idx = center_xyz.new_zeros((center_xyz.shape[0], sample_num), - dtype=torch.int32) - ext_module.stack_ball_query_forward( - center_xyz, - center_xyz_batch_cnt, - xyz, - xyz_batch_cnt, - idx, - max_radius=max_radius, - nsample=sample_num, - ) - else: - B, N, _ = xyz.size() - npoint = center_xyz.size(1) - idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int32) - ext_module.ball_query_forward( - center_xyz, - xyz, - idx, - b=B, - n=N, - m=npoint, - min_radius=min_radius, - max_radius=max_radius, - nsample=sample_num) + + B, N, _ = xyz.size() + npoint = center_xyz.size(1) + idx = xyz.new_zeros(B, npoint, sample_num, dtype=torch.int) + + ext_module.ball_query_forward( + center_xyz, + xyz, + idx, + b=B, + n=N, + m=npoint, + min_radius=min_radius, + max_radius=max_radius, + nsample=sample_num) if torch.__version__ != 'parrots': ctx.mark_non_differentiable(idx) return idx @staticmethod - def backward(ctx, a=None) -> Tuple[None, None, None, None]: + def backward(ctx, a=None): return None, None, None, None diff --git a/mmcv/ops/bbox.py b/mmcv/ops/bbox.py index 4ba93d6..0c4d58b 100644 --- a/mmcv/ops/bbox.py +++ b/mmcv/ops/bbox.py @@ -1,57 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. -import torch - from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['bbox_overlaps']) -def _bbox_overlaps_cpu(bboxes1: torch.Tensor, - bboxes2: torch.Tensor, - mode: str = 'iou', - aligned: bool = False, - offset: int = 0) -> torch.Tensor: - assert mode in ['iou', 'iof'] - - if aligned: - lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] - rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] - - wh = (rb - lt + offset).clamp(min=0) # [rows, 2] - overlap = wh[:, 0] * wh[:, 1] - area1 = (bboxes1[:, 2] - bboxes1[:, 0] + offset) * ( - bboxes1[:, 3] - bboxes1[:, 1] + offset) - - if mode == 'iou': - area2 = (bboxes2[:, 2] - bboxes2[:, 0] + offset) * ( - bboxes2[:, 3] - bboxes2[:, 1] + offset) - ious = overlap / (area1 + area2 - overlap) - else: - ious = overlap / area1 - else: - lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] - rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] - - wh = (rb - lt + offset).clamp(min=0) # [rows, cols, 2] - overlap = wh[:, :, 0] * wh[:, :, 1] - area1 = (bboxes1[:, 2] - bboxes1[:, 0] + offset) * ( - bboxes1[:, 3] - bboxes1[:, 1] + offset) - - if mode == 'iou': - area2 = (bboxes2[:, 2] - bboxes2[:, 0] + offset) * ( - bboxes2[:, 3] - bboxes2[:, 1] + offset) - ious = overlap / (area1[:, None] + area2 - overlap) - else: - ious = overlap / (area1[:, None]) - - return ious - - -def bbox_overlaps(bboxes1: torch.Tensor, - bboxes2: torch.Tensor, - mode: str = 'iou', - aligned: bool = False, - offset: int = 0) -> torch.Tensor: +def bbox_overlaps(bboxes1, bboxes2, mode='iou', aligned=False, offset=0): """Calculate overlap between two set of bboxes. If ``aligned`` is ``False``, then calculate the ious between each bbox @@ -59,16 +12,14 @@ def bbox_overlaps(bboxes1: torch.Tensor, bboxes1 and bboxes2. Args: - bboxes1 (torch.Tensor): shape (m, 4) in format or - empty. - bboxes2 (torch.Tensor): shape (n, 4) in format or - empty. If aligned is ``True``, then m and n must be equal. + bboxes1 (Tensor): shape (m, 4) in format or empty. + bboxes2 (Tensor): shape (n, 4) in format or empty. + If aligned is ``True``, then m and n must be equal. mode (str): "iou" (intersection over union) or iof (intersection over foreground). Returns: - torch.Tensor: Return the ious betweens boxes. If ``aligned`` is - ``False``, the shape of ious is (m, n) else (m, 1). + ious(Tensor): shape (m, n) if aligned == False else shape (m, 1) Example: >>> bboxes1 = torch.FloatTensor([ @@ -106,17 +57,16 @@ def bbox_overlaps(bboxes1: torch.Tensor, rows = bboxes1.size(0) cols = bboxes2.size(0) - if aligned: assert rows == cols - ious = bboxes1.new_zeros(rows) - else: - ious = bboxes1.new_zeros((rows, cols)) if rows * cols == 0: - return ious + return bboxes1.new(rows, 1) if aligned else bboxes1.new(rows, cols) + if aligned: + ious = bboxes1.new_zeros(rows) + else: + ious = bboxes1.new_zeros((rows, cols)) ext_module.bbox_overlaps( bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset) - return ious diff --git a/mmcv/ops/bezier_align.py b/mmcv/ops/bezier_align.py deleted file mode 100644 index 6db7f5c..0000000 --- a/mmcv/ops/bezier_align.py +++ /dev/null @@ -1,137 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple, Union - -import torch -import torch.nn as nn -from torch.autograd import Function -from torch.autograd.function import once_differentiable -from torch.nn.modules.utils import _pair - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', ['bezier_align_forward', 'bezier_align_backward']) - - -class BezierAlignFunction(Function): - - @staticmethod - def forward(ctx, - input: torch.Tensor, - beziers: torch.Tensor, - output_size: Union[int, Tuple[int, int]], - spatial_scale: Union[int, float] = 1.0, - sampling_ratio: int = 0, - aligned: bool = True) -> torch.Tensor: - ctx.output_size = _pair(output_size) - ctx.spatial_scale = spatial_scale - ctx.input_shape = input.size() - ctx.sampling_ratio = sampling_ratio - ctx.aligned = aligned - - assert beziers.size(1) == 17 - output_shape = (beziers.size(0), input.size(1), ctx.output_size[0], - ctx.output_size[1]) - output = input.new_zeros(output_shape) - ext_module.bezier_align_forward( - input, - beziers, - output, - aligned_height=ctx.output_size[0], - aligned_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale, - sampling_ratio=ctx.sampling_ratio, - aligned=ctx.aligned) - - ctx.save_for_backward(beziers) - return output - - @staticmethod - @once_differentiable - def backward(ctx, grad_output: torch.Tensor): - beziers = ctx.saved_tensors[0] - grad_input = grad_output.new_zeros(ctx.input_shape) - grad_output = grad_output.contiguous() - ext_module.bezier_align_backward( - grad_output, - beziers, - grad_input, - aligned_height=ctx.output_size[0], - aligned_width=ctx.output_size[1], - spatial_scale=ctx.spatial_scale, - sampling_ratio=ctx.sampling_ratio, - aligned=ctx.aligned) - return grad_input, None, None, None, None, None - - -bezier_align = BezierAlignFunction.apply - - -class BezierAlign(nn.Module): - """Bezier align pooling layer. - - Args: - output_size (tuple): h, w - spatial_scale (float): scale the input boxes by this number - sampling_ratio (int): number of inputs samples to take for each - output sample. 0 to take samples densely for current models. - aligned (bool): if False, use the legacy implementation in - MMDetection. If True, align the results more perfectly. - - Note: - The implementation of BezierAlign is modified from - https://github.com/aim-uofa/AdelaiDet - - The meaning of aligned=True: - - Given a continuous coordinate c, its two neighboring pixel - indices (in our pixel model) are computed by floor(c - 0.5) and - ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete - indices [0] and [1] (which are sampled from the underlying signal - at continuous coordinates 0.5 and 1.5). But the original roi_align - (aligned=False) does not subtract the 0.5 when computing - neighboring pixel indices and therefore it uses pixels with a - slightly incorrect alignment (relative to our pixel model) when - performing bilinear interpolation. - - With `aligned=True`, - we first appropriately scale the ROI and then shift it by -0.5 - prior to calling roi_align. This produces the correct neighbors; - - The difference does not make a difference to the model's - performance if ROIAlign is used together with conv layers. - """ - - def __init__( - self, - output_size: Tuple, - spatial_scale: Union[int, float], - sampling_ratio: int, - aligned: bool = True, - ) -> None: - super().__init__() - - self.output_size = _pair(output_size) - self.spatial_scale = float(spatial_scale) - self.sampling_ratio = int(sampling_ratio) - self.aligned = aligned - - def forward(self, input: torch.Tensor, - beziers: torch.Tensor) -> torch.Tensor: - """BezierAlign forward. - - Args: - inputs (Tensor): input features. - beziers (Tensor): beziers for align. - """ - return bezier_align(input, beziers, self.output_size, - self.spatial_scale, self.sampling_ratio, - self.aligned) - - def __repr__(self): - s = self.__class__.__name__ - s += f'(output_size={self.output_size}, ' - s += f'spatial_scale={self.spatial_scale})' - s += f'sampling_ratio={self.sampling_ratio})' - s += f'aligned={self.aligned})' - return s diff --git a/mmcv/ops/bias_act.py b/mmcv/ops/bias_act.py deleted file mode 100644 index 3dfa557..0000000 --- a/mmcv/ops/bias_act.py +++ /dev/null @@ -1,375 +0,0 @@ -# Modified from -# https://github.com/NVlabs/stylegan3/blob/main/torch_utils/ops/bias_act.py - -# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. - -# source: https://github.com/open-mmlab/mmediting/blob/dev-1.x/mmedit/models/editors/stylegan3/stylegan3_ops/ops/bias_act.py # noqa -"""Custom PyTorch ops for efficient bias and activation.""" - -from typing import Any, Dict, Optional, Union - -import numpy as np -import torch - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['bias_act']) - - -class EasyDict(dict): - """Convenience class that behaves like a dict but allows access with the - attribute syntax.""" - - def __getattr__(self, name: str) -> Any: - try: - return self[name] - except KeyError: - raise AttributeError(name) - - def __setattr__(self, name: str, value: Any) -> None: - self[name] = value - - def __delattr__(self, name: str) -> None: - del self[name] - - -activation_funcs = { - 'linear': - EasyDict( - func=lambda x, **_: x, - def_alpha=0, - def_gain=1, - cuda_idx=1, - ref='', - has_2nd_grad=False), - 'relu': - EasyDict( - func=lambda x, **_: torch.nn.functional.relu(x), - def_alpha=0, - def_gain=np.sqrt(2), - cuda_idx=2, - ref='y', - has_2nd_grad=False), - 'lrelu': - EasyDict( - func=lambda x, alpha, **_: torch.nn.functional.leaky_relu(x, alpha), - def_alpha=0.2, - def_gain=np.sqrt(2), - cuda_idx=3, - ref='y', - has_2nd_grad=False), - 'tanh': - EasyDict( - func=lambda x, **_: torch.tanh(x), - def_alpha=0, - def_gain=1, - cuda_idx=4, - ref='y', - has_2nd_grad=True), - 'sigmoid': - EasyDict( - func=lambda x, **_: torch.sigmoid(x), - def_alpha=0, - def_gain=1, - cuda_idx=5, - ref='y', - has_2nd_grad=True), - 'elu': - EasyDict( - func=lambda x, **_: torch.nn.functional.elu(x), - def_alpha=0, - def_gain=1, - cuda_idx=6, - ref='y', - has_2nd_grad=True), - 'selu': - EasyDict( - func=lambda x, **_: torch.nn.functional.selu(x), - def_alpha=0, - def_gain=1, - cuda_idx=7, - ref='y', - has_2nd_grad=True), - 'softplus': - EasyDict( - func=lambda x, **_: torch.nn.functional.softplus(x), - def_alpha=0, - def_gain=1, - cuda_idx=8, - ref='y', - has_2nd_grad=True), - 'swish': - EasyDict( - func=lambda x, **_: torch.sigmoid(x) * x, - def_alpha=0, - def_gain=np.sqrt(2), - cuda_idx=9, - ref='x', - has_2nd_grad=True), -} - -_null_tensor = torch.empty([0]) - - -def bias_act(input: torch.Tensor, - bias: Optional[torch.Tensor] = None, - dim: int = 1, - act: str = 'linear', - alpha: Optional[Union[float, int]] = None, - gain: Optional[float] = None, - clamp: Optional[float] = None, - use_custom_op: bool = True): - r"""Fused bias and activation function. - - Adds `bias` to activation tensor `input`, and evaluates activation - function `act`, and scales the result by `gain`. Each of the steps is - optional. - - In most cases, the fused op is considerably more efficient than performing - the same calculation using standard PyTorch ops. It supports first and - second order gradients, but not third order gradients. - - Args: - input (torch.Tensor): Input activation tensor. Can be of any shape. - bias (torch.Tensor): Bias vector, or `None` to disable. - Must be a 1D tensor of the same type as `input`. The shape must - be known, and it must match the dimension of `input` corresponding - to `dim`. Defaults to None. - dim (int): The dimension in `input` corresponding to the elements of - `bias`. The value of `dim` is ignored if `b` is not specified. - Defaults to 1. - act (str): Name of the activation function to evaluate, or `"linear"` - to disable. Can be e.g. "relu", "lrelu", "tanh", "sigmoid", - "swish", etc. See `activation_funcs` for a full list. `None` is not - allowed. Defaults to `linear`. - alpha (float or int): Shape parameter for the activation - function, or `None` to use the default. Defaults to None. - gain (float): Scaling factor for the output tensor, or `None` - to use default. See `activation_funcs` for the default scaling of - each activation function. If unsure, consider specifying 1. - Defaults to None. - clamp (float): Clamp the output values to `[-clamp, +clamp]`, - or `None` to disable the clamping (default). Defaults to None. - use_custom_op (bool): Whether to use customized op. - Defaults to True. - - Returns: - torch.Tensor: Tensor of the same shape and datatype as `input`. - """ - assert isinstance(input, torch.Tensor) - if use_custom_op and input.is_cuda: - return _bias_act_cuda( - dim=dim, act=act, alpha=alpha, gain=gain, - clamp=clamp).apply(input, bias) - return _bias_act_ref( - input=input, - bias=bias, - dim=dim, - act=act, - alpha=alpha, - gain=gain, - clamp=clamp) - - -def _bias_act_ref(input: torch.Tensor, - bias: Optional[torch.Tensor] = None, - dim: int = 1, - act: str = 'linear', - alpha: Optional[Union[float, int]] = None, - gain: Optional[float] = None, - clamp: Optional[float] = None): - """Slow reference implementation of `bias_act()` using standard PyTorch - ops. - - Adds `bias` to activation tensor `input`, and evaluates activation - function `act`, and scales the result by `gain`. Each of the steps is - optional. - - In most cases, the fused op is considerably more efficient than performing - the same calculation using standard PyTorch ops. It supports first and - second order gradients, but not third order gradients. - - Args: - input (torch.Tensor): Input activation tensor. Can be of any shape. - bias (torch.Tensor): Bias vector, or `None` to disable. - Must be a 1D tensor of the same type as `input`. The shape must - be known, and it must match the dimension of `input` corresponding - to `dim`. Defaults to None. - dim (int): The dimension in `input` corresponding to the elements of - `bias`. The value of `dim` is ignored if `b` is not specified. - Defaults to 1. - act (str): Name of the activation function to evaluate, or `"linear"` - to disable. Can be e.g. "relu", "lrelu", "tanh", "sigmoid", - "swish", etc. See `activation_funcs` for a full list. `None` is not - allowed. Defaults to `linear`. - alpha (float or int): Shape parameter for the activation - function, or `None` to use the default. Defaults to None. - gain (float): Scaling factor for the output tensor, or `None` - to use default. See `activation_funcs` for the default scaling of - each activation function. If unsure, consider specifying 1. - Defaults to None. - clamp (float): Clamp the output values to - `[-clamp, +clamp]`, or `None` to disable the clamping (default). - Defaults to None. - - Returns: - torch.Tensor: Tensor of the same shape and datatype as `input`. - """ - assert isinstance(input, torch.Tensor) - assert clamp is None or clamp >= 0 - spec = activation_funcs[act] - alpha = float(alpha if alpha is not None else spec.def_alpha) - gain = float(gain if gain is not None else spec.def_gain) - clamp = float(clamp if clamp is not None else -1) - - # Add bias. - if bias is not None: - assert isinstance(bias, torch.Tensor) and bias.ndim == 1 - assert 0 <= dim < input.ndim - assert bias.shape[0] == input.shape[dim] - input = input + bias.reshape( - [-1 if i == dim else 1 for i in range(input.ndim)]) - - # Evaluate activation function. - alpha = float(alpha) - output = spec.func(input, alpha=alpha) - - # Scale by gain. - gain = float(gain) - if gain != 1: - output = output * gain - - # Clamp. - if clamp >= 0: - # pylint: disable=invalid-unary-operand-type - output = output.clamp(-clamp, clamp) - return output - - -_bias_act_cuda_cache: Dict = dict() - - -def _bias_act_cuda(dim: int = 1, - act: str = 'linear', - alpha: Optional[Union[float, int]] = None, - gain: Optional[float] = None, - clamp: Optional[float] = None): - """"Fast CUDA implementation of `bias_act()` using custom ops. - - Args: - dim (int): The dimension in `x` corresponding to the elements of `b`. - The value of `dim` is ignored if `b` is not specified. - Defaults to 1. - act (str): Name of the activation function to evaluate, or `"linear"` - to disable. Can be e.g. "relu", "lrelu", "tanh", "sigmoid", - "swish", etc. See `activation_funcs` for a full list. `None` is not - allowed. Defaults to `linear`. - alpha (float | int): Shape parameter for the activation - function, or `None` to use the default. Defaults to None. - gain (float): Scaling factor for the output tensor, or `None` - to use default. See `activation_funcs` for the default scaling of - each activation function. If unsure, consider specifying 1. - Defaults to None. - clamp (float): Clamp the output values to `[-clamp, +clamp]`, - or `None` to disable the clamping (default). Defaults to None. - - Returns: - torch.Tensor: Tensor of the same shape and datatype as `x`. - """ - # Parse arguments. - assert clamp is None or clamp >= 0 - spec = activation_funcs[act] - alpha = float(alpha if alpha is not None else spec.def_alpha) - gain = float(gain if gain is not None else spec.def_gain) - clamp = float(clamp if clamp is not None else -1) - - # Lookup from cache. - key = (dim, act, alpha, gain, clamp) - if key in _bias_act_cuda_cache: - return _bias_act_cuda_cache[key] - - # Forward op. - class BiasActCuda(torch.autograd.Function): - - @staticmethod - def forward(ctx, x, b): # pylint: disable=arguments-differ - ctx.memory_format = torch.channels_last if x.ndim > 2 and x.stride( - 1) == 1 else torch.contiguous_format - x = x.contiguous(memory_format=ctx.memory_format) - b = b.contiguous() if b is not None else _null_tensor.to(x.device) - y = x - if act != 'linear' or gain != 1 or clamp >= 0 or ( - b is not _null_tensor.to(x.device)): - y = ext_module.bias_act(x, b, _null_tensor.to(x.device), - _null_tensor.to(x.device), - _null_tensor.to(x.device), 0, dim, - spec.cuda_idx, alpha, gain, clamp) - ctx.save_for_backward( - x if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor.to( - x.device), b if 'x' in spec.ref or spec.has_2nd_grad else - _null_tensor.to(x.device), - y if 'y' in spec.ref else _null_tensor.to(x.device)) - return y - - @staticmethod - def backward(ctx, dy): # pylint: disable=arguments-differ - dy = dy.contiguous(memory_format=ctx.memory_format) - x, b, y = ctx.saved_tensors - dx = None - db = None - - if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]: - dx = dy - if act != 'linear' or gain != 1 or clamp >= 0: - dx = BiasActCudaGrad.apply(dy, x, b, y) - - if ctx.needs_input_grad[1]: - db = dx.sum([i for i in range(dx.ndim) if i != dim]) - - return dx, db - - # Backward op. - class BiasActCudaGrad(torch.autograd.Function): - - @staticmethod - def forward(ctx, dy, x, b, y): # pylint: disable=arguments-differ - ctx.memory_format = torch.channels_last if dy.ndim > 2 and ( - dy.stride(1) == 1) else torch.contiguous_format - dx = ext_module.bias_act(dy, b, x, y, _null_tensor.to(x.device), 1, - dim, spec.cuda_idx, alpha, gain, clamp) - ctx.save_for_backward( - dy if spec.has_2nd_grad else _null_tensor.to(x.device), x, b, - y) - return dx - - @staticmethod - def backward(ctx, d_dx): # pylint: disable=arguments-differ - d_dx = d_dx.contiguous(memory_format=ctx.memory_format) - dy, x, b, y = ctx.saved_tensors - d_dy = None - d_x = None - d_b = None - d_y = None - - if ctx.needs_input_grad[0]: - d_dy = BiasActCudaGrad.apply(d_dx, x, b, y) - - if spec.has_2nd_grad and (ctx.needs_input_grad[1] - or ctx.needs_input_grad[2]): - d_x = ext_module.bias_act(d_dx, b, x, y, dy, 2, dim, - spec.cuda_idx, alpha, gain, clamp) - - if spec.has_2nd_grad and ctx.needs_input_grad[2]: - d_b = d_x.sum([i for i in range(d_x.ndim) if i != dim]) - - return d_dy, d_x, d_b, d_y - - # Add to cache. - _bias_act_cuda_cache[key] = BiasActCuda - return BiasActCuda diff --git a/mmcv/ops/border_align.py b/mmcv/ops/border_align.py index c09501b..ff305be 100644 --- a/mmcv/ops/border_align.py +++ b/mmcv/ops/border_align.py @@ -2,8 +2,6 @@ # modified from # https://github.com/Megvii-BaseDetection/cvpods/blob/master/cvpods/layers/border_align.py -from typing import Tuple - import torch import torch.nn as nn from torch.autograd import Function @@ -23,8 +21,7 @@ class BorderAlignFunction(Function): 'mmcv::MMCVBorderAlign', input, boxes, pool_size_i=pool_size) @staticmethod - def forward(ctx, input: torch.Tensor, boxes: torch.Tensor, - pool_size: int) -> torch.Tensor: + def forward(ctx, input, boxes, pool_size): ctx.pool_size = pool_size ctx.input_shape = input.size() @@ -48,8 +45,7 @@ class BorderAlignFunction(Function): @staticmethod @once_differentiable - def backward(ctx, - grad_output: torch.Tensor) -> Tuple[torch.Tensor, None, None]: + def backward(ctx, grad_output): boxes, argmax_idx = ctx.saved_tensors grad_input = grad_output.new_zeros(ctx.input_shape) # complex head architecture may cause grad_output uncontiguous @@ -76,25 +72,24 @@ class BorderAlign(nn.Module): For each border line (e.g. top, left, bottom or right) of each box, border_align does the following: - - 1. uniformly samples ``pool_size`` +1 positions on this line, involving - the start and end points. - 2. the corresponding features on these points are computed by bilinear - interpolation. - 3. max pooling over all the ``pool_size`` +1 positions are used for - computing pooled feature. + 1. uniformly samples `pool_size`+1 positions on this line, involving \ + the start and end points. + 2. the corresponding features on these points are computed by \ + bilinear interpolation. + 3. max pooling over all the `pool_size`+1 positions are used for \ + computing pooled feature. Args: pool_size (int): number of positions sampled over the boxes' borders (e.g. top, bottom, left, right). + """ - def __init__(self, pool_size: int): - super().__init__() + def __init__(self, pool_size): + super(BorderAlign, self).__init__() self.pool_size = pool_size - def forward(self, input: torch.Tensor, - boxes: torch.Tensor) -> torch.Tensor: + def forward(self, input, boxes): """ Args: input: Features with shape [N,4C,H,W]. Channels ranged in [0,C), @@ -103,8 +98,8 @@ class BorderAlign(nn.Module): boxes: Boxes with shape [N,H*W,4]. Coordinate format (x1,y1,x2,y2). Returns: - torch.Tensor: Pooled features with shape [N,C,H*W,4]. The order is - (top,left,bottom,right) for the last dimension. + Tensor: Pooled features with shape [N,C,H*W,4]. The order is + (top,left,bottom,right) for the last dimension. """ return border_align(input, boxes, self.pool_size) diff --git a/mmcv/ops/box_iou_quadri.py b/mmcv/ops/box_iou_quadri.py deleted file mode 100644 index 89747fd..0000000 --- a/mmcv/ops/box_iou_quadri.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['box_iou_quadri']) - - -def box_iou_quadri(bboxes1: torch.Tensor, - bboxes2: torch.Tensor, - mode: str = 'iou', - aligned: bool = False) -> torch.Tensor: - """Return intersection-over-union (Jaccard index) of boxes. - - Both sets of boxes are expected to be in - (x1, y1, ..., x4, y4) format. - - If ``aligned`` is ``False``, then calculate the ious between each bbox - of bboxes1 and bboxes2, otherwise the ious between each aligned pair of - bboxes1 and bboxes2. - - Args: - bboxes1 (torch.Tensor): quadrilateral bboxes 1. It has shape (N, 8), - indicating (x1, y1, ..., x4, y4) for each row. - bboxes2 (torch.Tensor): quadrilateral bboxes 2. It has shape (M, 8), - indicating (x1, y1, ..., x4, y4) for each row. - mode (str): "iou" (intersection over union) or iof (intersection over - foreground). - - Returns: - torch.Tensor: Return the ious betweens boxes. If ``aligned`` is - ``False``, the shape of ious is (N, M) else (N,). - """ - assert mode in ['iou', 'iof'] - mode_dict = {'iou': 0, 'iof': 1} - mode_flag = mode_dict[mode] - rows = bboxes1.size(0) - cols = bboxes2.size(0) - if aligned: - ious = bboxes1.new_zeros(rows) - else: - ious = bboxes1.new_zeros(rows * cols) - bboxes1 = bboxes1.contiguous() - bboxes2 = bboxes2.contiguous() - ext_module.box_iou_quadri( - bboxes1, bboxes2, ious, mode_flag=mode_flag, aligned=aligned) - if not aligned: - ious = ious.view(rows, cols) - return ious diff --git a/mmcv/ops/box_iou_rotated.py b/mmcv/ops/box_iou_rotated.py index 2443af2..2d78015 100644 --- a/mmcv/ops/box_iou_rotated.py +++ b/mmcv/ops/box_iou_rotated.py @@ -1,16 +1,10 @@ # Copyright (c) OpenMMLab. All rights reserved. -import torch - from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['box_iou_rotated']) -def box_iou_rotated(bboxes1: torch.Tensor, - bboxes2: torch.Tensor, - mode: str = 'iou', - aligned: bool = False, - clockwise: bool = True) -> torch.Tensor: +def box_iou_rotated(bboxes1, bboxes2, mode='iou', aligned=False): """Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in @@ -20,110 +14,18 @@ def box_iou_rotated(bboxes1: torch.Tensor, of bboxes1 and bboxes2, otherwise the ious between each aligned pair of bboxes1 and bboxes2. - .. note:: - The operator assumes: - - 1) The positive direction along x axis is left -> right. - - 2) The positive direction along y axis is top -> down. - - 3) The w border is in parallel with x axis when angle = 0. - - However, there are 2 opposite definitions of the positive angular - direction, clockwise (CW) and counter-clockwise (CCW). MMCV supports - both definitions and uses CW by default. - - Please set ``clockwise=False`` if you are using the CCW definition. - - The coordinate system when ``clockwise`` is ``True`` (default) - - .. code-block:: none - - 0-------------------> x (0 rad) - | A-------------B - | | | - | | box h - | | angle=0 | - | D------w------C - v - y (pi/2 rad) - - In such coordination system the rotation matrix is - - .. math:: - \\begin{pmatrix} - \\cos\\alpha & -\\sin\\alpha \\\\ - \\sin\\alpha & \\cos\\alpha - \\end{pmatrix} - - The coordinates of the corner point A can be calculated as: - - .. math:: - P_A= - \\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix} - = - \\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} + - \\begin{pmatrix}\\cos\\alpha & -\\sin\\alpha \\\\ - \\sin\\alpha & \\cos\\alpha\\end{pmatrix} - \\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\ - = - \\begin{pmatrix} x_{center}-0.5w\\cos\\alpha+0.5h\\sin\\alpha - \\\\ - y_{center}-0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix} - - - The coordinate system when ``clockwise`` is ``False`` - - .. code-block:: none - - 0-------------------> x (0 rad) - | A-------------B - | | | - | | box h - | | angle=0 | - | D------w------C - v - y (-pi/2 rad) - - In such coordination system the rotation matrix is - - .. math:: - \\begin{pmatrix} - \\cos\\alpha & \\sin\\alpha \\\\ - -\\sin\\alpha & \\cos\\alpha - \\end{pmatrix} - - The coordinates of the corner point A can be calculated as: - - .. math:: - P_A= - \\begin{pmatrix} x_A \\\\ y_A\\end{pmatrix} - = - \\begin{pmatrix} x_{center} \\\\ y_{center}\\end{pmatrix} + - \\begin{pmatrix}\\cos\\alpha & \\sin\\alpha \\\\ - -\\sin\\alpha & \\cos\\alpha\\end{pmatrix} - \\begin{pmatrix} -0.5w \\\\ -0.5h\\end{pmatrix} \\\\ - = - \\begin{pmatrix} x_{center}-0.5w\\cos\\alpha-0.5h\\sin\\alpha - \\\\ - y_{center}+0.5w\\sin\\alpha-0.5h\\cos\\alpha\\end{pmatrix} - - Args: - boxes1 (torch.Tensor): rotated bboxes 1. It has shape (N, 5), - indicating (x, y, w, h, theta) for each row. Note that theta is in - radian. - boxes2 (torch.Tensor): rotated bboxes 2. It has shape (M, 5), - indicating (x, y, w, h, theta) for each row. Note that theta is in - radian. + Arguments: + boxes1 (Tensor): rotated bboxes 1. \ + It has shape (N, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. + boxes2 (Tensor): rotated bboxes 2. \ + It has shape (M, 5), indicating (x, y, w, h, theta) for each row. + Note that theta is in radian. mode (str): "iou" (intersection over union) or iof (intersection over foreground). - clockwise (bool): flag indicating whether the positive angular - orientation is clockwise. default True. - `New in version 1.4.3.` Returns: - torch.Tensor: Return the ious betweens boxes. If ``aligned`` is - ``False``, the shape of ious is (N, M) else (N,). + ious(Tensor): shape (N, M) if aligned == False else shape (N,) """ assert mode in ['iou', 'iof'] mode_dict = {'iou': 0, 'iof': 1} @@ -133,12 +35,7 @@ def box_iou_rotated(bboxes1: torch.Tensor, if aligned: ious = bboxes1.new_zeros(rows) else: - ious = bboxes1.new_zeros(rows * cols) - if not clockwise: - flip_mat = bboxes1.new_ones(bboxes1.shape[-1]) - flip_mat[-1] = -1 - bboxes1 = bboxes1 * flip_mat - bboxes2 = bboxes2 * flip_mat + ious = bboxes1.new_zeros((rows * cols)) bboxes1 = bboxes1.contiguous() bboxes2 = bboxes2.contiguous() ext_module.box_iou_rotated( diff --git a/mmcv/ops/carafe.py b/mmcv/ops/carafe.py index f7e79c2..5154cb3 100644 --- a/mmcv/ops/carafe.py +++ b/mmcv/ops/carafe.py @@ -1,15 +1,11 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple - import torch import torch.nn as nn import torch.nn.functional as F -from mmengine.model import normal_init, xavier_init -from mmengine.registry import MODELS -from torch import Tensor from torch.autograd import Function from torch.nn.modules.module import Module +from ..cnn import UPSAMPLE_LAYERS, normal_init, xavier_init from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', [ @@ -21,8 +17,7 @@ ext_module = ext_loader.load_ext('_ext', [ class CARAFENaiveFunction(Function): @staticmethod - def symbolic(g, features: Tensor, masks: Tensor, kernel_size: int, - group_size: int, scale_factor: int) -> Tensor: + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): return g.op( 'mmcv::MMCVCARAFENaive', features, @@ -32,8 +27,7 @@ class CARAFENaiveFunction(Function): scale_factor_f=scale_factor) @staticmethod - def forward(ctx, features: Tensor, masks: Tensor, kernel_size: int, - group_size: int, scale_factor: int) -> Tensor: + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): assert scale_factor >= 1 assert masks.size(1) == kernel_size * kernel_size * group_size assert masks.size(-1) == features.size(-1) * scale_factor @@ -56,15 +50,12 @@ class CARAFENaiveFunction(Function): group_size=group_size, scale_factor=scale_factor) - if features.requires_grad or masks.requires_grad or \ - torch.__version__ == 'parrots': + if features.requires_grad or masks.requires_grad: ctx.save_for_backward(features, masks) return output @staticmethod - def backward( - ctx, - grad_output: Tensor) -> Tuple[Tensor, Tensor, None, None, None]: + def backward(ctx, grad_output): assert grad_output.is_cuda features, masks = ctx.saved_tensors @@ -92,8 +83,8 @@ carafe_naive = CARAFENaiveFunction.apply class CARAFENaive(Module): - def __init__(self, kernel_size: int, group_size: int, scale_factor: int): - super().__init__() + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFENaive, self).__init__() assert isinstance(kernel_size, int) and isinstance( group_size, int) and isinstance(scale_factor, int) @@ -101,7 +92,7 @@ class CARAFENaive(Module): self.group_size = group_size self.scale_factor = scale_factor - def forward(self, features: Tensor, masks: Tensor) -> Tensor: + def forward(self, features, masks): return carafe_naive(features, masks, self.kernel_size, self.group_size, self.scale_factor) @@ -109,8 +100,7 @@ class CARAFENaive(Module): class CARAFEFunction(Function): @staticmethod - def symbolic(g, features: Tensor, masks: Tensor, kernel_size: int, - group_size: int, scale_factor: int) -> Tensor: + def symbolic(g, features, masks, kernel_size, group_size, scale_factor): return g.op( 'mmcv::MMCVCARAFE', features, @@ -120,8 +110,7 @@ class CARAFEFunction(Function): scale_factor_f=scale_factor) @staticmethod - def forward(ctx, features: Tensor, masks: Tensor, kernel_size: int, - group_size: int, scale_factor: int) -> Tensor: + def forward(ctx, features, masks, kernel_size, group_size, scale_factor): assert scale_factor >= 1 assert masks.size(1) == kernel_size * kernel_size * group_size assert masks.size(-1) == features.size(-1) * scale_factor @@ -150,15 +139,14 @@ class CARAFEFunction(Function): group_size=group_size, scale_factor=scale_factor) - if features.requires_grad or masks.requires_grad or \ - torch.__version__ == 'parrots': + if features.requires_grad or masks.requires_grad: ctx.save_for_backward(features, masks, rfeatures) return output @staticmethod - def backward( - ctx, - grad_output: Tensor) -> Tuple[Tensor, Tensor, None, None, None]: + def backward(ctx, grad_output): + assert grad_output.is_cuda + features, masks, rfeatures = ctx.saved_tensors kernel_size = ctx.kernel_size group_size = ctx.group_size @@ -192,8 +180,7 @@ carafe = CARAFEFunction.apply class CARAFE(Module): """ CARAFE: Content-Aware ReAssembly of FEatures - Please refer to `CARAFE: Content-Aware ReAssembly of FEatures - `_ for more details. + Please refer to https://arxiv.org/abs/1905.02188 for more details. Args: kernel_size (int): reassemble kernel size @@ -204,8 +191,8 @@ class CARAFE(Module): upsampled feature map """ - def __init__(self, kernel_size: int, group_size: int, scale_factor: int): - super().__init__() + def __init__(self, kernel_size, group_size, scale_factor): + super(CARAFE, self).__init__() assert isinstance(kernel_size, int) and isinstance( group_size, int) and isinstance(scale_factor, int) @@ -213,19 +200,19 @@ class CARAFE(Module): self.group_size = group_size self.scale_factor = scale_factor - def forward(self, features: Tensor, masks: Tensor) -> Tensor: + def forward(self, features, masks): return carafe(features, masks, self.kernel_size, self.group_size, self.scale_factor) -@MODELS.register_module(name='carafe') +@UPSAMPLE_LAYERS.register_module(name='carafe') class CARAFEPack(nn.Module): """A unified package of CARAFE upsampler that contains: 1) channel compressor 2) content encoder 3) CARAFE op. Official implementation of ICCV 2019 paper - `CARAFE: Content-Aware ReAssembly of FEatures - `_. + CARAFE: Content-Aware ReAssembly of FEatures + Please refer to https://arxiv.org/abs/1905.02188 for more details. Args: channels (int): input feature channels @@ -241,14 +228,14 @@ class CARAFEPack(nn.Module): """ def __init__(self, - channels: int, - scale_factor: int, - up_kernel: int = 5, - up_group: int = 1, - encoder_kernel: int = 3, - encoder_dilation: int = 1, - compressed_channels: int = 64): - super().__init__() + channels, + scale_factor, + up_kernel=5, + up_group=1, + encoder_kernel=3, + encoder_dilation=1, + compressed_channels=64): + super(CARAFEPack, self).__init__() self.channels = channels self.scale_factor = scale_factor self.up_kernel = up_kernel @@ -274,7 +261,7 @@ class CARAFEPack(nn.Module): xavier_init(m, distribution='uniform') normal_init(self.content_encoder, std=0.001) - def kernel_normalizer(self, mask: Tensor) -> Tensor: + def kernel_normalizer(self, mask): mask = F.pixel_shuffle(mask, self.scale_factor) n, mask_c, h, w = mask.size() # use float division explicitly, @@ -287,11 +274,11 @@ class CARAFEPack(nn.Module): return mask - def feature_reassemble(self, x: Tensor, mask: Tensor) -> Tensor: + def feature_reassemble(self, x, mask): x = carafe(x, mask, self.up_kernel, self.up_group, self.scale_factor) return x - def forward(self, x: Tensor) -> Tensor: + def forward(self, x): compressed_x = self.channel_compressor(x) mask = self.content_encoder(compressed_x) mask = self.kernel_normalizer(mask) diff --git a/mmcv/ops/cc_attention.py b/mmcv/ops/cc_attention.py index efde7b7..ff8dd4c 100644 --- a/mmcv/ops/cc_attention.py +++ b/mmcv/ops/cc_attention.py @@ -2,12 +2,11 @@ import torch import torch.nn as nn import torch.nn.functional as F -from mmengine.registry import MODELS -from mmcv.cnn import Scale +from mmcv.cnn import PLUGIN_LAYERS, Scale -def NEG_INF_DIAG(n: int, device: torch.device) -> torch.Tensor: +def NEG_INF_DIAG(n, device): """Returns a diagonal matrix of size [n, n]. The diagonal are all "-inf". This is for avoiding calculating the @@ -16,7 +15,7 @@ def NEG_INF_DIAG(n: int, device: torch.device) -> torch.Tensor: return torch.diag(torch.tensor(float('-inf')).to(device).repeat(n), 0) -@MODELS.register_module() +@PLUGIN_LAYERS.register_module() class CrissCrossAttention(nn.Module): """Criss-Cross Attention Module. @@ -42,7 +41,7 @@ class CrissCrossAttention(nn.Module): in_channels (int): Channels of the input feature map. """ - def __init__(self, in_channels: int) -> None: + def __init__(self, in_channels): super().__init__() self.query_conv = nn.Conv2d(in_channels, in_channels // 8, 1) self.key_conv = nn.Conv2d(in_channels, in_channels // 8, 1) @@ -50,15 +49,14 @@ class CrissCrossAttention(nn.Module): self.gamma = Scale(0.) self.in_channels = in_channels - def forward(self, x: torch.Tensor) -> torch.Tensor: + def forward(self, x): """forward function of Criss-Cross Attention. Args: - x (torch.Tensor): Input feature with the shape of - (batch_size, in_channels, height, width). - + x (Tensor): Input feature. \ + shape (batch_size, in_channels, height, width) Returns: - torch.Tensor: Output of the layer, with the shape of + Tensor: Output of the layer, with shape of \ (batch_size, in_channels, height, width) """ B, C, H, W = x.size() @@ -79,7 +77,7 @@ class CrissCrossAttention(nn.Module): return out - def __repr__(self) -> str: + def __repr__(self): s = self.__class__.__name__ s += f'(in_channels={self.in_channels})' return s diff --git a/mmcv/ops/chamfer_distance.py b/mmcv/ops/chamfer_distance.py deleted file mode 100644 index 1f908a5..0000000 --- a/mmcv/ops/chamfer_distance.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Sequence, Tuple - -import torch -from torch import Tensor -from torch.autograd import Function -from torch.autograd.function import once_differentiable - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext( - '_ext', ['chamfer_distance_forward', 'chamfer_distance_backward']) - - -class ChamferDistanceFunction(Function): - """This is an implementation of the 2D Chamfer Distance. - - It has been used in the paper `Oriented RepPoints for Aerial Object - Detection (CVPR 2022) _`. - """ - - @staticmethod - def forward(ctx, xyz1: Tensor, xyz2: Tensor) -> Sequence[Tensor]: - """ - Args: - xyz1 (Tensor): Point set with shape (B, N, 2). - xyz2 (Tensor): Point set with shape (B, N, 2). - - Returns: - Sequence[Tensor]: - - - dist1 (Tensor): Chamfer distance (xyz1 to xyz2) with - shape (B, N). - - dist2 (Tensor): Chamfer distance (xyz2 to xyz1) with - shape (B, N). - - idx1 (Tensor): Index of chamfer distance (xyz1 to xyz2) - with shape (B, N), which be used in compute gradient. - - idx2 (Tensor): Index of chamfer distance (xyz2 to xyz2) - with shape (B, N), which be used in compute gradient. - """ - batch_size, n, _ = xyz1.size() - _, m, _ = xyz2.size() - device = xyz1.device - xyz1 = xyz1.contiguous() - xyz2 = xyz2.contiguous() - - dist1 = torch.zeros(batch_size, n).to(device) - dist2 = torch.zeros(batch_size, m).to(device) - idx1 = torch.zeros(batch_size, n).type(torch.IntTensor).to(device) - idx2 = torch.zeros(batch_size, m).type(torch.IntTensor).to(device) - - ext_module.chamfer_distance_forward(xyz1, xyz2, dist1, dist2, idx1, - idx2) - ctx.save_for_backward(xyz1, xyz2, idx1, idx2) - return dist1, dist2, idx1, idx2 - - @staticmethod - @once_differentiable - def backward(ctx, - grad_dist1: Tensor, - grad_dist2: Tensor, - grad_idx1=None, - grad_idx2=None) -> Tuple[Tensor, Tensor]: - """ - - Args: - grad_dist1 (Tensor): Gradient of chamfer distance - (xyz1 to xyz2) with shape (B, N). - grad_dist2 (Tensor): Gradient of chamfer distance - (xyz2 to xyz1) with shape (B, N). - - Returns: - Tuple[Tensor, Tensor]: - - - grad_xyz1 (Tensor): Gradient of the point set with shape \ - (B, N, 2). - - grad_xyz2 (Tensor):Gradient of the point set with shape \ - (B, N, 2). - """ - xyz1, xyz2, idx1, idx2 = ctx.saved_tensors - device = grad_dist1.device - grad_dist1 = grad_dist1.contiguous() - grad_dist2 = grad_dist2.contiguous() - grad_xyz1 = torch.zeros(xyz1.size()).to(device) - grad_xyz2 = torch.zeros(xyz2.size()).to(device) - - ext_module.chamfer_distance_backward(xyz1, xyz2, idx1, idx2, - grad_dist1, grad_dist2, grad_xyz1, - grad_xyz2) - return grad_xyz1, grad_xyz2 - - -chamfer_distance = ChamferDistanceFunction.apply diff --git a/mmcv/ops/contour_expand.py b/mmcv/ops/contour_expand.py index 7184609..ea1111e 100644 --- a/mmcv/ops/contour_expand.py +++ b/mmcv/ops/contour_expand.py @@ -1,6 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Union - import numpy as np import torch @@ -9,22 +7,21 @@ from ..utils import ext_loader ext_module = ext_loader.load_ext('_ext', ['contour_expand']) -def contour_expand(kernel_mask: Union[np.array, torch.Tensor], - internal_kernel_label: Union[np.array, torch.Tensor], - min_kernel_area: int, kernel_num: int) -> list: +def contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, + kernel_num): """Expand kernel contours so that foreground pixels are assigned into instances. - Args: - kernel_mask (np.array or torch.Tensor): The instance kernel mask with + Arguments: + kernel_mask (np.array or Tensor): The instance kernel mask with size hxw. - internal_kernel_label (np.array or torch.Tensor): The instance internal + internal_kernel_label (np.array or Tensor): The instance internal kernel label with size hxw. min_kernel_area (int): The minimum kernel area. kernel_num (int): The instance kernel number. Returns: - list: The instance index map with size hxw. + label (list): The instance index map with size hxw. """ assert isinstance(kernel_mask, (torch.Tensor, np.ndarray)) assert isinstance(internal_kernel_label, (torch.Tensor, np.ndarray)) @@ -45,7 +42,7 @@ def contour_expand(kernel_mask: Union[np.array, torch.Tensor], internal_kernel_label, min_kernel_area=min_kernel_area, kernel_num=kernel_num) - label = label.tolist() # type: ignore + label = label.tolist() else: label = ext_module.contour_expand(kernel_mask, internal_kernel_label, min_kernel_area, kernel_num) diff --git a/mmcv/ops/conv2d_gradfix.py b/mmcv/ops/conv2d_gradfix.py deleted file mode 100644 index 9d4ef6e..0000000 --- a/mmcv/ops/conv2d_gradfix.py +++ /dev/null @@ -1,346 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. - -# source: https://github.com/NVlabs/stylegan3/blob/main/torch_utils/ops/conv2d_gradfix.py # noqa -"""Custom replacement for `torch.nn.functional.conv2d` that supports -arbitrarily high order gradients with zero performance penalty.""" - -import contextlib -import warnings -from typing import Dict, Optional, Tuple, Union - -import torch -from mmengine.utils import digit_version - -enabled = True -weight_gradients_disabled = False - - -@contextlib.contextmanager -def no_weight_gradients(disable=True): - global weight_gradients_disabled - old = weight_gradients_disabled - if disable: - weight_gradients_disabled = True - yield - weight_gradients_disabled = old - - -def conv2d(input: torch.Tensor, - weight: torch.Tensor, - bias: Optional[torch.Tensor] = None, - stride: Union[int, Tuple[int, ...]] = 1, - padding: Union[int, Tuple[int, ...]] = 0, - dilation: Union[int, Tuple[int, ...]] = 1, - groups: int = 1): - flag = True - if digit_version(torch.__version__) >= digit_version('1.10.0'): - warnings.warn('Since ' - 'aten:cudnn_convolution_backward_weight is ' - f'not supported in torch=={torch.__version__},' - ' rolling back to `torch.nn.functional.conv2d`') - flag = False - if _should_use_custom_op(input) and flag: - return _conv2d_gradfix( - transpose=False, - weight_shape=weight.shape, - stride=stride, - padding=padding, - output_padding=0, - dilation=dilation, - groups=groups).apply(input, weight, bias) - return torch.nn.functional.conv2d( - input=input, - weight=weight, - bias=bias, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups) - - -def conv_transpose2d(input: torch.Tensor, - weight: torch.Tensor, - bias: Optional[torch.Tensor] = None, - stride: Union[int, Tuple[int, ...]] = 1, - padding: Union[int, Tuple[int, ...]] = 0, - output_padding: Union[int, Tuple[int, ...]] = 0, - groups: int = 1, - dilation: Union[int, Tuple[int, ...]] = 1): - if _should_use_custom_op(input): - return _conv2d_gradfix( - transpose=True, - weight_shape=weight.shape, - stride=stride, - padding=padding, - output_padding=output_padding, - groups=groups, - dilation=dilation).apply(input, weight, bias) - return torch.nn.functional.conv_transpose2d( - input=input, - weight=weight, - bias=bias, - stride=stride, - padding=padding, - output_padding=output_padding, - groups=groups, - dilation=dilation) - - -def _should_use_custom_op(input): - assert isinstance(input, torch.Tensor) - if (not enabled) or (not torch.backends.cudnn.enabled): - return False - if input.device.type != 'cuda': - return False - return True - - -def _to_tuple(x, ndim): - xs = tuple(x) if isinstance(x, (tuple, list)) else (x, ) * ndim - assert len(xs) == ndim - assert all(isinstance(x, int) for x in xs) - return xs - - -_conv2d_gradfix_cache: Dict = dict() -_null_tensor = torch.empty([0]) - - -def _conv2d_gradfix( - transpose: bool, - weight_shape: Tuple[int, ...], - stride: Union[int, Tuple[int, ...]], - padding: Union[int, Tuple[int, ...]], - output_padding: Union[int, Tuple[int, ...]], - dilation: Union[int, Tuple[int, ...]], - groups: int, -): - # Parse arguments. - ndim = 2 - weight_shape = tuple(weight_shape) - stride = _to_tuple(stride, ndim) - padding = _to_tuple(padding, ndim) - output_padding = _to_tuple(output_padding, ndim) - dilation = _to_tuple(dilation, ndim) - - # Lookup from cache. - key = (transpose, weight_shape, stride, padding, output_padding, dilation, - groups) - if key in _conv2d_gradfix_cache: - return _conv2d_gradfix_cache[key] - - # Validate arguments. - - assert groups >= 1 - assert len(weight_shape) == ndim + 2 - assert all(stride[i] >= 1 for i in range(ndim)) # type: ignore - assert all(padding[i] >= 0 for i in range(ndim)) # type: ignore - assert all(dilation[i] >= 0 for i in range(ndim)) # type: ignore - if not transpose: - assert all(output_padding[i] == 0 for i in range(ndim)) # type: ignore - else: # transpose - for i in range(ndim): - assert 0 <= output_padding[i] < max( # type: ignore - stride[i], # type: ignore - dilation[i]) # type: ignore - - # Helpers. - common_kwargs = dict( - stride=stride, padding=padding, dilation=dilation, groups=groups) - - def calc_output_padding(input_shape, output_shape): - if transpose: - return [0, 0] - return [ - input_shape[i + 2] - (output_shape[i + 2] - 1) * stride[i] - - (1 - 2 * padding[i]) - dilation[i] * (weight_shape[i + 2] - 1) - for i in range(ndim) - ] - - # Forward & backward. - class Conv2d(torch.autograd.Function): - - @staticmethod - def forward(ctx, input, weight, bias): - assert weight.shape == weight_shape - ctx.save_for_backward( - input if weight.requires_grad else _null_tensor, - weight if input.requires_grad else _null_tensor, - ) - ctx.input_shape = input.shape - - # Simple 1x1 convolution => cuBLAS (only on Volta, not on Ampere). - if weight_shape[2:] == stride == dilation == ( - 1, 1) and padding == ( - 0, 0) and torch.cuda.get_device_capability( - input.device) < (8, 0): - a = weight.reshape(groups, weight_shape[0] // groups, - weight_shape[1]) - b = input.reshape(input.shape[0], groups, - input.shape[1] // groups, -1) - c = (a.transpose(1, 2) if transpose else a) @ b.permute( - 1, 2, 0, 3).flatten(2) - c = c.reshape(-1, input.shape[0], - *input.shape[2:]).transpose(0, 1) - c = c if bias is None else c + bias.unsqueeze(0).unsqueeze( - 2).unsqueeze(3) - return c.contiguous( - memory_format=(torch.channels_last if input.stride(1) == - 1 else torch.contiguous_format)) - - # General case => cuDNN. - if transpose: - return torch.nn.functional.conv_transpose2d( - input=input, - weight=weight, - bias=bias, - output_padding=output_padding, - **common_kwargs) - return torch.nn.functional.conv2d( - input=input, weight=weight, bias=bias, **common_kwargs) - - @staticmethod - def backward(ctx, grad_output): - input, weight = ctx.saved_tensors - input_shape = ctx.input_shape - grad_input = None - grad_weight = None - grad_bias = None - - if ctx.needs_input_grad[0]: - p = calc_output_padding( - input_shape=input_shape, output_shape=grad_output.shape) - op = _conv2d_gradfix( - transpose=(not transpose), - weight_shape=weight_shape, - output_padding=p, - **common_kwargs) - grad_input = op.apply(grad_output, weight, None) - assert grad_input.shape == input_shape - - if ctx.needs_input_grad[1] and not weight_gradients_disabled: - grad_weight = Conv2dGradWeight.apply(grad_output, input) - assert grad_weight.shape == weight_shape - - if ctx.needs_input_grad[2]: - grad_bias = grad_output.sum([0, 2, 3]) - - return grad_input, grad_weight, grad_bias - - # Gradient with respect to the weights. - class Conv2dGradWeight(torch.autograd.Function): - - @staticmethod - def forward(ctx, grad_output, input): - ctx.save_for_backward( - grad_output if input.requires_grad else _null_tensor, - input if grad_output.requires_grad else _null_tensor, - ) - ctx.grad_output_shape = grad_output.shape - ctx.input_shape = input.shape - - # Simple 1x1 convolution => cuBLAS (on both Volta and Ampere). - if weight_shape[2:] == stride == dilation == ( - 1, 1) and padding == (0, 0): - a = grad_output.reshape(grad_output.shape[0], groups, - grad_output.shape[1] // groups, - -1).permute(1, 2, 0, 3).flatten(2) - b = input.reshape(input.shape[0], groups, - input.shape[1] // groups, - -1).permute(1, 2, 0, 3).flatten(2) - c = (b @ a.transpose(1, 2) if transpose else - a @ b.transpose(1, 2)).reshape(weight_shape) - return c.contiguous( - memory_format=(torch.channels_last if input.stride(1) == - 1 else torch.contiguous_format)) - - # PyTorch consolidated convolution backward API in PR: - # https://github.com/pytorch/pytorch/commit/3dc3651e0ee3623f669c3a2c096408dbc476d122 # noqa: E501 - # Enhance the code referring to the discussion: - # https://github.com/pytorch/pytorch/issues/74437 - if digit_version(torch.__version__) >= digit_version('1.11.0'): - empty_weight = torch.tensor( - 0.0, dtype=input.dtype, - device=input.device).expand(weight_shape) - output_padding = calc_output_padding(input.shape, - grad_output.shape) - return torch.ops.aten.convolution_backward( - grad_output, - input, - empty_weight, - None, - stride=stride, - dilation=dilation, - transposed=transpose, - padding=padding, - groups=groups, - output_padding=output_padding, - output_mask=[0, 1, 0])[1] - else: - is_rocm_pytorch = False - try: - from torch.utils.cpp_extension import ROCM_HOME - is_rocm_pytorch = True if ((torch.version.hip is not None) and - (ROCM_HOME is not None)) else False - except ImportError: - pass - name='' - flags=[] - if is_rocm_pytorch: - name = ('aten::miopen_convolution_transpose_backward_weight' - if transpose else - 'aten::miopen_convolution_backward_weight') - flags = [ - torch.backends.cudnn.benchmark, - torch.backends.cudnn.deterministic - ] - else: - # General case => cuDNN. - name = ('aten::cudnn_convolution_transpose_backward_weight' - if transpose else - 'aten::cudnn_convolution_backward_weight') - flags = [ - torch.backends.cudnn.benchmark, - torch.backends.cudnn.deterministic, - torch.backends.cudnn.allow_tf32 - ] - return torch._C._jit_get_operation(name)(weight_shape, - grad_output, input, - padding, stride, - dilation, groups, - *flags) - - @staticmethod - def backward(ctx, grad2_grad_weight): - grad_output, input = ctx.saved_tensors - grad_output_shape = ctx.grad_output_shape - input_shape = ctx.input_shape - grad2_grad_output = None - grad2_input = None - - if ctx.needs_input_grad[0]: - grad2_grad_output = Conv2d.apply(input, grad2_grad_weight, - None) - assert grad2_grad_output.shape == grad_output_shape - - if ctx.needs_input_grad[1]: - p = calc_output_padding( - input_shape=input_shape, output_shape=grad_output_shape) - op = _conv2d_gradfix( - transpose=(not transpose), - weight_shape=weight_shape, - output_padding=p, - **common_kwargs) - grad2_input = op.apply(grad_output, grad2_grad_weight, None) - assert grad2_input.shape == input_shape - - return grad2_grad_output, grad2_input - - _conv2d_gradfix_cache[key] = Conv2d - return Conv2d diff --git a/mmcv/ops/convex_iou.py b/mmcv/ops/convex_iou.py deleted file mode 100644 index 5005036..0000000 --- a/mmcv/ops/convex_iou.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple - -import torch - -from ..utils import ext_loader - -ext_module = ext_loader.load_ext('_ext', ['convex_iou', 'convex_giou']) - - -def convex_giou(pointsets: torch.Tensor, - polygons: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: - """Return generalized intersection-over-union (Jaccard index) between point - sets and polygons. - - Args: - pointsets (torch.Tensor): It has shape (N, 18), - indicating (x1, y1, x2, y2, ..., x9, y9) for each row. - polygons (torch.Tensor): It has shape (N, 8), - indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row. - - Returns: - tuple[torch.Tensor, torch.Tensor]: The first element is the gious - between point sets and polygons with the shape (N,). The second - element is the gradient of point sets with the shape (N, 18). - """ - output = pointsets.new_zeros((pointsets.size(0), 19)) - ext_module.convex_giou(pointsets, polygons, output) - convex_giou = output[:, -1] - points_grad = output[:, 0:-1] - return convex_giou, points_grad - - -def convex_iou(pointsets: torch.Tensor, - polygons: torch.Tensor) -> torch.Tensor: - """Return intersection-over-union (Jaccard index) between point sets and - polygons. - - Args: - pointsets (torch.Tensor): It has shape (N, 18), - indicating (x1, y1, x2, y2, ..., x9, y9) for each row. - polygons (torch.Tensor): It has shape (K, 8), - indicating (x1, y1, x2, y2, x3, y3, x4, y4) for each row. - - Returns: - torch.Tensor: Return the ious between point sets and polygons with the - shape (N, K). - """ - N, K = pointsets.size(0), polygons.size(0) - ious = pointsets.new_zeros((N, K)) - ext_module.convex_iou(pointsets, polygons, ious) - return ious diff --git a/mmcv/ops/corner_pool.py b/mmcv/ops/corner_pool.py index f18e92d..a33d798 100644 --- a/mmcv/ops/corner_pool.py +++ b/mmcv/ops/corner_pool.py @@ -1,37 +1,101 @@ # Copyright (c) OpenMMLab. All rights reserved. import torch -from torch import Tensor, nn -from mmengine.utils import digit_version +from torch import nn +from torch.autograd import Function + +from ..utils import ext_loader + +ext_module = ext_loader.load_ext('_ext', [ + 'top_pool_forward', 'top_pool_backward', 'bottom_pool_forward', + 'bottom_pool_backward', 'left_pool_forward', 'left_pool_backward', + 'right_pool_forward', 'right_pool_backward' +]) + _mode_dict = {'top': 0, 'bottom': 1, 'left': 2, 'right': 3} -def _corner_pool(x: Tensor, dim: int, flip: bool) -> Tensor: - size = x.size(dim) - output = x.clone() +class TopPoolFunction(Function): - ind = 1 - while ind < size: - if flip: - cur_start = 0 - cur_len = size - ind - next_start = ind - next_len = size - ind - else: - cur_start = ind - cur_len = size - ind - next_start = 0 - next_len = size - ind + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['top'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.top_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.top_pool_backward(input, grad_output) + return output + + +class BottomPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['bottom'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.bottom_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.bottom_pool_backward(input, grad_output) + return output - # max_temp should be cloned for backward computation - max_temp = output.narrow(dim, cur_start, cur_len).clone() - cur_temp = output.narrow(dim, cur_start, cur_len) - next_temp = output.narrow(dim, next_start, next_len) - cur_temp[...] = torch.where(max_temp > next_temp, max_temp, next_temp) +class LeftPoolFunction(Function): - ind = ind << 1 + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['left'])) + return output - return output + @staticmethod + def forward(ctx, input): + output = ext_module.left_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.left_pool_backward(input, grad_output) + return output + + +class RightPoolFunction(Function): + + @staticmethod + def symbolic(g, input): + output = g.op( + 'mmcv::MMCVCornerPool', input, mode_i=int(_mode_dict['right'])) + return output + + @staticmethod + def forward(ctx, input): + output = ext_module.right_pool_forward(input) + ctx.save_for_backward(input) + return output + + @staticmethod + def backward(ctx, grad_output): + input, = ctx.saved_tensors + output = ext_module.right_pool_backward(input, grad_output) + return output class CornerPool(nn.Module): @@ -40,13 +104,11 @@ class CornerPool(nn.Module): Corner Pooling is a new type of pooling layer that helps a convolutional network better localize corners of bounding boxes. - Please refer to `CornerNet: Detecting Objects as Paired Keypoints - `_ for more details. - + Please refer to https://arxiv.org/abs/1808.01244 for more details. Code is modified from https://github.com/princeton-vl/CornerNet-Lite. Args: - mode (str): Pooling orientation for the pooling layer + mode(str): Pooling orientation for the pooling layer - 'bottom': Bottom Pooling - 'left': Left Pooling @@ -57,6 +119,13 @@ class CornerPool(nn.Module): Feature map after pooling. """ + pool_functions = { + 'bottom': BottomPoolFunction, + 'left': LeftPoolFunction, + 'right': RightPoolFunction, + 'top': TopPoolFunction, + } + cummax_dim_flip = { 'bottom': (2, False), 'left': (3, True), @@ -64,13 +133,23 @@ class CornerPool(nn.Module): 'top': (2, True), } - def __init__(self, mode: str): - super().__init__() - assert mode in self.cummax_dim_flip + def __init__(self, mode): + super(CornerPool, self).__init__() + assert mode in self.pool_functions self.mode = mode + self.corner_pool = self.pool_functions[mode] + + def forward(self, x): + if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0': + if torch.onnx.is_in_onnx_export(): + assert torch.__version__ >= '1.7.0', \ + 'When `cummax` serves as an intermediate component whose '\ + 'outputs is used as inputs for another modules, it\'s '\ + 'expected that pytorch version must be >= 1.7.0, '\ + 'otherwise Error appears like: `RuntimeError: tuple '\ + 'appears in op that does not forward tuples, unsupported '\ + 'kind: prim::PythonOp`.' - def forward(self, x: Tensor) -> Tensor: - if torch.__version__ != 'parrots' and digit_version(torch.__version__) >= digit_version('1.5.0'): dim, flip = self.cummax_dim_flip[self.mode] if flip: x = x.flip(dim) @@ -79,5 +158,4 @@ class CornerPool(nn.Module): pool_tensor = pool_tensor.flip(dim) return pool_tensor else: - dim, flip = self.cummax_dim_flip[self.mode] - return _corner_pool(x, dim, flip) + return self.corner_pool.apply(x) diff --git a/mmcv/ops/correlation.py b/mmcv/ops/correlation.py index 319b764..3d0b79c 100644 --- a/mmcv/ops/correlation.py +++ b/mmcv/ops/correlation.py @@ -1,6 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from typing import Tuple - import torch from torch import Tensor, nn from torch.autograd import Function @@ -17,14 +15,14 @@ class CorrelationFunction(Function): @staticmethod def forward(ctx, - input1: Tensor, - input2: Tensor, - kernel_size: int = 1, - max_displacement: int = 1, - stride: int = 1, - padding: int = 1, - dilation: int = 1, - dilation_patch: int = 1) -> Tensor: + input1, + input2, + kernel_size=1, + max_displacement=1, + stride=1, + padding=1, + dilation=1, + dilation_patch=1): ctx.save_for_backward(input1, input2) @@ -62,9 +60,7 @@ class CorrelationFunction(Function): @staticmethod @once_differentiable - def backward( - ctx, grad_output: Tensor - ) -> Tuple[Tensor, Tensor, None, None, None, None, None, None]: + def backward(ctx, grad_output): input1, input2 = ctx.saved_tensors kH, kW = ctx.kernel_size diff --git a/mmcv/ops/csrc/README.md b/mmcv/ops/csrc/README.md index 8fcc6eb..3bc0200 100644 --- a/mmcv/ops/csrc/README.md +++ b/mmcv/ops/csrc/README.md @@ -13,150 +13,158 @@ This folder contains all non-python code for MMCV custom ops. Please follow the │ ├── pytorch_cpp_helper.hpp │ ├── pytorch_cuda_helper.hpp │ ├── pytorch_device_registry.hpp -│   ├── cuda -│   │ ├── common_cuda_helper.hpp -│   │ ├── parrots_cudawarpfunction.cuh -│   │ ├── ... -│   │ └── ops_cuda_kernel.cuh -|   ├── mps -│   │ ├── MPSLibrary.h -│   │ ├── ... -│   │ └── MPSUtils.h -|   ├── mlu -│   │ └── ... -|   └── utils -│   │ └── ... +│   └── cuda +│   ├── common_cuda_helper.hpp +│   ├── parrots_cudawarpfunction.cuh +│   ├── ... +│   └── ops_cuda_kernel.cuh +├── onnxruntime +│   ├── onnxruntime_register.h +│   ├── onnxruntime_session_options_config_keys.h +│   ├── ort_mmcv_utils.h +│   ├── ... +│   ├── onnx_ops.h +│   └── cpu +│ ├── onnxruntime_register.cpp +│      ├── ... +│      └── onnx_ops_impl.cpp ├── parrots │   ├── ... │   ├── ops.cpp │   ├── ops_parrots.cpp │   └── ops_pytorch.h -└── pytorch -    ├── info.cpp -    ├── pybind.cpp -    ├── ... -    ├── ops.cpp -    ├── cuda -    │   ├── ... -    │   └── ops_cuda.cu -    ├── cpu -    │   ├── ... -    │   └── ops.cpp -    ├── mps -    │   ├── ... -    |   └── op_mps.mm -    └── mlu -       ├── ... -       └── op_mlu.cpp +├── pytorch +│   ├── info.cpp +│   ├── pybind.cpp +│   ├── ... +│   ├── ops.cpp +│   ├── cuda +│   │   ├── ... +│   │   └── ops_cuda.cu +│   └── cpu +│      ├── ... +│      └── ops.cpp +└── tensorrt + ├── trt_cuda_helper.cuh + ├── trt_plugin_helper.hpp + ├── trt_plugin.hpp + ├── trt_serialize.hpp + ├── ... + ├── trt_ops.hpp + └── plugins +    ├── trt_cuda_helper.cu +    ├── trt_plugin.cpp +    ├── ... +    ├── trt_ops.cpp +    └── trt_ops_kernel.cu ``` ## Components - `common`: This directory contains all tools and shared codes. - `cuda`: The cuda kernels which can be shared by all backends. **HIP** kernel is also here since they have similar syntax. - - `mps`: The tools used to support MPS ops. **NOTE** that MPS support is **experimental**. - - `mlu`: The MLU kernels used to support [Cambricon](https://www.cambricon.com/) device. - - `utils`: The kernels and utils of spconv. +- `onnxruntime`: **ONNX Runtime** support for custom ops. + - `cpu`: CPU implementation of supported ops. - `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory. - `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory. - `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops. - `cpu`: This directory contain cpu implementations of corresponding custom ops. - - `mlu`: This directory contain launchers of each MLU kernels. - - `mps`: MPS ops implementation and launchers. +- `tensorrt`: **TensorRT** support for custom ops. + - `plugins`: This directory contains the implementation of the supported custom ops. Some ops might also use shared cuda kernel in `common/cuda`. ## How to add new PyTorch ops? 1. (Optional) Add shared kernel in `common` to support special hardware platform. - ```c++ - // src/common/cuda/new_ops_cuda_kernel.cuh - - template - __global__ void new_ops_forward_cuda_kernel(const T* input, T* output, ...) { - // forward here - } - - ``` - - Add cuda kernel launcher in `pytorch/cuda`. - - ```c++ - // src/pytorch/cuda - #include - - void NewOpsForwardCUDAKernelLauncher(Tensor input, Tensor output, ...){ - // initialize - at::cuda::CUDAGuard device_guard(input.device()); - cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - ... - AT_DISPATCH_FLOATING_TYPES_AND_HALF( - input.scalar_type(), "new_ops_forward_cuda_kernel", ([&] { - new_ops_forward_cuda_kernel - <<>>( - input.data_ptr(), output.data_ptr(),...); - })); - AT_CUDA_CHECK(cudaGetLastError()); - } - ``` + ```c++ + // src/common/cuda/new_ops_cuda_kernel.cuh + + template + __global__ void new_ops_forward_cuda_kernel(const T* input, T* output, ...) { + // forward here + } + + ``` + + Add cuda kernel launcher in `pytorch/cuda`. + + ```c++ + // src/pytorch/cuda + #include + + void NewOpsForwardCUDAKernelLauncher(Tensor input, Tensor output, ...){ + // initialize + at::cuda::CUDAGuard device_guard(input.device()); + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + ... + AT_DISPATCH_FLOATING_TYPES_AND_HALF( + input.scalar_type(), "new_ops_forward_cuda_kernel", ([&] { + new_ops_forward_cuda_kernel + <<>>( + input.data_ptr(), output.data_ptr(),...); + })); + AT_CUDA_CHECK(cudaGetLastError()); + } + ``` 2. Register implementation for different devices. - ```c++ - // src/pytorch/cuda/cudabind.cpp - ... + ```c++ + // src/pytorch/cuda/cudabind.cpp + ... - Tensor new_ops_forward_cuda(Tensor input, Tensor output, ...){ - // implement cuda forward here - // use `NewOpsForwardCUDAKernelLauncher` here - } - // declare interface here. - Tensor new_ops_forward_impl(Tensor input, Tensor output, ...); - // register the implementation for given device (CUDA here). - REGISTER_DEVICE_IMPL(new_ops_forward_impl, CUDA, new_ops_forward_cuda); - ``` + Tensor new_ops_forward_cuda(Tensor input, Tensor output, ...){ + // implement cuda forward here + // use `NewOpsForwardCUDAKernelLauncher` here + } + // declare interface here. + Tensor new_ops_forward_impl(Tensor input, Tensor output, ...); + // register the implementation for given device (CUDA here). + REGISTER_DEVICE_IMPL(new_ops_forward_impl, CUDA, new_ops_forward_cuda); + ``` 3. Add ops implementation in `pytorch` directory. Select different implementations according to device type. - ```c++ - // src/pytorch/new_ops.cpp - Tensor new_ops_forward_impl(Tensor input, Tensor output, ...){ - // dispatch the implementation according to the device type of input. - DISPATCH_DEVICE_IMPL(new_ops_forward_impl, input, output, ...); - } - ... + ```c++ + // src/pytorch/new_ops.cpp + Tensor new_ops_forward_impl(Tensor input, Tensor output, ...){ + // dispatch the implementation according to the device type of input. + DISPATCH_DEVICE_IMPL(new_ops_forward_impl, input, output, ...); + } + ... - Tensor new_ops_forward(Tensor input, Tensor output, ...){ - return new_ops_forward_impl(input, output, ...); - } - ``` + Tensor new_ops_forward(Tensor input, Tensor output, ...){ + return new_ops_forward_impl(input, output, ...); + } + ``` 4. Binding the implementation in `pytorch/pybind.cpp` - ```c++ - // src/pytorch/pybind.cpp + ```c++ + // src/pytorch/pybind.cpp - ... + ... - Tensor new_ops_forward(Tensor input, Tensor output, ...); + Tensor new_ops_forward(Tensor input, Tensor output, ...); - ... + ... - // bind with pybind11 - m.def("new_ops_forward", &new_ops_forward, "new_ops_forward", - py::arg("input"), py::arg("output"), ...); + // bind with pybind11 + m.def("new_ops_forward", &new_ops_forward, "new_ops_forward", + py::arg("input"), py::arg("output"), ...); - ... + ... - ``` + ``` 5. Build MMCV again. Enjoy new ops in python - ```python - from ..utils import ext_loader - ext_module = ext_loader.load_ext('_ext', ['new_ops_forward']) + ```python + from ..utils import ext_loader + ext_module = ext_loader.load_ext('_ext', ['new_ops_forward']) - ... + ... - ext_module.new_ops_forward(input, output, ...) + ext_module.new_ops_forward(input, output, ...) - ``` + ``` diff --git a/mmcv/ops/csrc/common/box_iou_rotated_utils.hpp b/mmcv/ops/csrc/common/box_iou_rotated_utils.hpp index a8453ea..67190dc 100644 --- a/mmcv/ops/csrc/common/box_iou_rotated_utils.hpp +++ b/mmcv/ops/csrc/common/box_iou_rotated_utils.hpp @@ -220,10 +220,6 @@ HOST_DEVICE_INLINE int convex_hull_graham(const Point (&p)[24], return temp > 0; } }); - // compute distance to origin after sort, since the points are now different. - for (int i = 0; i < num_in; i++) { - dist[i] = dot_2d(q[i], q[i]); - } #endif // Step 4: @@ -270,17 +266,6 @@ HOST_DEVICE_INLINE int convex_hull_graham(const Point (&p)[24], return m; } -template -HOST_DEVICE_INLINE T quadri_box_area(const Point (&q)[4]) { - T area = 0; -#pragma unroll - for (int i = 1; i < 3; i++) { - area += fabs(cross_2d(q[i] - q[0], q[i + 1] - q[0])); - } - - return area / 2.0; -} - template HOST_DEVICE_INLINE T polygon_area(const Point (&q)[24], const int& m) { if (m <= 2) { @@ -319,25 +304,6 @@ HOST_DEVICE_INLINE T rotated_boxes_intersection(const RotatedBox& box1, return polygon_area(orderedPts, num_convex); } -template -HOST_DEVICE_INLINE T quadri_boxes_intersection(const Point (&pts1)[4], - const Point (&pts2)[4]) { - // There are up to 4 x 4 + 4 + 4 = 24 intersections (including dups) returned - // from rotated_rect_intersection_pts - Point intersectPts[24], orderedPts[24]; - - int num = get_intersection_points(pts1, pts2, intersectPts); - - if (num <= 2) { - return 0.0; - } - - // Convex Hull to order the intersection points in clockwise order and find - // the contour area. - int num_convex = convex_hull_graham(intersectPts, num, orderedPts, true); - return polygon_area(orderedPts, num_convex); -} - } // namespace template @@ -375,52 +341,3 @@ HOST_DEVICE_INLINE T single_box_iou_rotated(T const* const box1_raw, const T iou = intersection / baseS; return iou; } - -template -HOST_DEVICE_INLINE T single_box_iou_quadri(T const* const pts1_raw, - T const* const pts2_raw, - const int mode_flag) { - // shift center to the middle point to achieve higher precision in result - Point pts1[4], pts2[4]; - - auto center_shift_x = - (pts1_raw[0] + pts2_raw[0] + pts1_raw[2] + pts2_raw[2] + pts1_raw[4] + - pts2_raw[4] + pts1_raw[6] + pts2_raw[6]) / - 8.0; - auto center_shift_y = - (pts1_raw[1] + pts2_raw[1] + pts1_raw[3] + pts2_raw[3] + pts1_raw[5] + - pts2_raw[5] + pts1_raw[7] + pts2_raw[7]) / - 8.0; - pts1[0].x = pts1_raw[0] - center_shift_x; - pts1[0].y = pts1_raw[1] - center_shift_y; - pts1[1].x = pts1_raw[2] - center_shift_x; - pts1[1].y = pts1_raw[3] - center_shift_y; - pts1[2].x = pts1_raw[4] - center_shift_x; - pts1[2].y = pts1_raw[5] - center_shift_y; - pts1[3].x = pts1_raw[6] - center_shift_x; - pts1[3].y = pts1_raw[7] - center_shift_y; - pts2[0].x = pts2_raw[0] - center_shift_x; - pts2[0].y = pts2_raw[1] - center_shift_y; - pts2[1].x = pts2_raw[2] - center_shift_x; - pts2[1].y = pts2_raw[3] - center_shift_y; - pts2[2].x = pts2_raw[4] - center_shift_x; - pts2[2].y = pts2_raw[5] - center_shift_y; - pts2[3].x = pts2_raw[6] - center_shift_x; - pts2[3].y = pts2_raw[7] - center_shift_y; - - const T area1 = quadri_box_area(pts1); - const T area2 = quadri_box_area(pts2); - if (area1 < 1e-14 || area2 < 1e-14) { - return 0.f; - } - - const T intersection = quadri_boxes_intersection(pts1, pts2); - T baseS = 1.0; - if (mode_flag == 0) { - baseS = (area1 + area2 - intersection); - } else if (mode_flag == 1) { - baseS = area1; - } - const T iou = intersection / baseS; - return iou; -} diff --git a/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh deleted file mode 100644 index 36e4110..0000000 --- a/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -// Modified from -// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/cuda/ActiveRotatingFilter_cuda.cu -#ifndef ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH -#define ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -template -__global__ void active_rotated_filter_forward_cuda_kernel( - const int nthreads, const scalar_t* weight_data, const int* indices_data, - const int num_input_planes, const int num_output_planes, - const int num_orientations, const int num_rotations, const int nEntry, - scalar_t* output_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int l = index % nEntry; - int j = (index / nEntry) % num_input_planes; - int i = index / nEntry / num_input_planes; - int k; - scalar_t val = *(weight_data + index); - for (k = 0; k < num_rotations; k++) { - int idx = (int)(*(indices_data + l * num_rotations + k)) - 1; - scalar_t* target = output_data + - i * (num_rotations * num_input_planes * nEntry) + - k * (num_input_planes * nEntry) + j * (nEntry) + idx; - *target = val; - } - } -} - -template -__global__ void active_rotated_filter_backward_cuda_kernel( - const int nthreads, const scalar_t* gradWeight_data, - const int* indices_data, const int num_input_planes, - const int num_output_planes, const int num_orientations, - const int num_rotations, const int nEntry, scalar_t* weight_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int l = index % nEntry; - int j = (index / nEntry) % num_input_planes; - int i = index / nEntry / num_input_planes; - int k; - scalar_t* val = weight_data + index; - *val = 0; - scalar_t tmp = 0; - for (k = 0; k < num_rotations; k++) { - int idx = (int)(*(indices_data + l * num_rotations + k)) - 1; - scalar_t target = - *(gradWeight_data + i * (num_rotations * num_input_planes * nEntry) + - k * (num_input_planes * nEntry) + j * (nEntry) + idx); - tmp = tmp + target; - } - *val = tmp; - } -} -#endif // ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh index 9f92508..056d123 100644 --- a/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh @@ -22,34 +22,34 @@ __global__ void assign_score_withk_forward_cuda_kernel( const int O, const int aggregate, const T* points, const T* centers, const T* scores, const int64_t* knn_idx, T* output) { // ----- parallel loop for B, N1, K and O --------- - CUDA_1D_KERNEL_LOOP(i, B * O * N1 * K) { - // ------- loop for M ---------- - const int b = (int)(i / (O * N1 * K)); - const int o = (int)(i % (O * N1 * K) / (N1 * K)); - const int n = (int)(i % (N1 * K) / K); - const int k = (int)(i % K); - const int cn = (int)knn_idx[b * K * N1 + n * K + - 0]; // The first neighbor is the center point - const int kn = (int)knn_idx[b * K * N1 + n * K + k]; - if (kn >= N0 || - kn < 0) { // if index overflows, it is out of the neighborhood range - return; - } - assert(b < B); - assert(kn < N0); - assert(cn < N0); - assert(o < O); - assert(n < N1); - const int out_idx = b * N1 * O * K + o * N1 * K + n * K + k; - T val = output[out_idx]; - for (int m = 0; m < M; m++) { - val += points[b * N0 * M * O + kn * M * O + m * O + o] * - scores[b * N1 * K * M + n * K * M + k * M + m] - - centers[b * N0 * M * O + cn * M * O + m * O + o] * - scores[b * N1 * K * M + n * K * M + k * M + m]; - } - output[out_idx] = val; + long i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= B * N1 * K * O) return; + // ------- loop for M ---------- + const int b = (int)(i / (O * N1 * K)); + const int o = (int)(i % (O * N1 * K) / (N1 * K)); + const int n = (int)(i % (N1 * K) / K); + const int k = (int)(i % K); + const int cn = (int)knn_idx[b * K * N1 + n * K + + 0]; // The first neighbor is the center point + const int kn = (int)knn_idx[b * K * N1 + n * K + k]; + if (kn >= N0 || + kn < 0) { // if index overflows, it is out of the neighborhood range + return; + } + assert(b < B); + assert(kn < N0); + assert(cn < N0); + assert(o < O); + assert(n < N1); + const int out_idx = b * N1 * O * K + o * N1 * K + n * K + k; + T val = output[out_idx]; + for (int m = 0; m < M; m++) { + val += points[b * N0 * M * O + kn * M * O + m * O + o] * + scores[b * N1 * K * M + n * K * M + k * M + m] - + centers[b * N0 * M * O + cn * M * O + m * O + o] * + scores[b * N1 * K * M + n * K * M + k * M + m]; } + output[out_idx] = val; } template @@ -58,27 +58,27 @@ __global__ void assign_score_withk_points_backward_cuda_kernel( const int O, const int aggregate, const T* grad_out, const T* scores, const int64_t* knn_idx, T* grad_points, T* grad_centers) { // ----- parallel loop for B, M, O --------- - CUDA_1D_KERNEL_LOOP(i, B * M * O) { - int b = (int)(i / (M * O)); - int m = (int)(i % (M * O) / O); - int o = (int)(i % O); + long i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= B * M * O) return; + int b = (int)(i / (M * O)); + int m = (int)(i % (M * O) / O); + int o = (int)(i % O); - // ----- loop for N,K --------- - for (int n = 0; n < N; n++) { - for (int k = 0; k < K; k++) { - int kn = knn_idx[b * N * K + n * K + k]; - int cn = knn_idx[b * N * K + n * K + 0]; - if (kn >= N0 || kn < 0) { // if index overflows, it is out of the - // neighborhood range - continue; - } - atomicAdd(grad_points + b * N0 * M * O + kn * M * O + m * O + o, - scores[b * N * K * M + n * K * M + k * M + m] * - grad_out[b * O * N * K + o * N * K + n * K + k]); - atomicAdd(grad_centers + b * N0 * M * O + cn * M * O + m * O + o, - -scores[b * N * K * M + n * K * M + k * M + m] * - grad_out[b * O * N * K + o * N * K + n * K + k]); + // ----- loop for N,K --------- + for (int n = 0; n < N; n++) { + for (int k = 0; k < K; k++) { + int kn = knn_idx[b * N * K + n * K + k]; + int cn = knn_idx[b * N * K + n * K + 0]; + if (kn >= N0 || + kn < 0) { // if index overflows, it is out of the neighborhood range + continue; } + atomicAdd(grad_points + b * N0 * M * O + kn * M * O + m * O + o, + scores[b * N * K * M + n * K * M + k * M + m] * + grad_out[b * O * N * K + o * N * K + n * K + k]); + atomicAdd(grad_centers + b * N0 * M * O + cn * M * O + m * O + o, + -scores[b * N * K * M + n * K * M + k * M + m] * + grad_out[b * O * N * K + o * N * K + n * K + k]); } } } @@ -89,28 +89,28 @@ __global__ void assign_score_withk_scores_backward_cuda_kernel( const int O, const int aggregate, const T* grad_out, const T* points, const T* centers, const int64_t* knn_idx, T* grad_scores) { // ----- parallel loop for B, N, K, M --------- - CUDA_1D_KERNEL_LOOP(i, B * N * K * M) { - const int b = (int)(i / (N * M * K)); - const int n = (int)(i % (N * M * K) / M / K); - const int k = (int)(i % (M * K) / M); - const int m = (int)(i % M); - const int cn = knn_idx[b * N * K + n * K + 0]; - const int kn = knn_idx[b * N * K + n * K + k]; - if (kn >= N0 || - kn < 0) { // if index overflows, it is out of the neighborhood range - return; - } + long i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= B * N * K * M) return; + const int b = (int)(i / (N * M * K)); + const int n = (int)(i % (N * M * K) / M / K); + const int k = (int)(i % (M * K) / M); + const int m = (int)(i % M); + const int cn = knn_idx[b * N * K + n * K + 0]; + const int kn = knn_idx[b * N * K + n * K + k]; + if (kn >= N0 || + kn < 0) { // if index overflows, it is out of the neighborhood range + return; + } - // -------------- loop for O ------------------------ - const int out_idx = b * N * K * M + n * K * M + k * M + m; - T val = grad_scores[out_idx]; - for (int o = 0; o < O; o++) { - val += (points[b * N0 * M * O + kn * M * O + m * O + o] - - centers[b * N0 * M * O + cn * M * O + m * O + o]) * - grad_out[b * O * N * K + o * N * K + n * K + k]; - } - grad_scores[out_idx] = val; + // -------------- loop for O ------------------------ + const int out_idx = b * N * K * M + n * K * M + k * M + m; + T val = grad_scores[out_idx]; + for (int o = 0; o < O; o++) { + val += (points[b * N0 * M * O + kn * M * O + m * O + o] - + centers[b * N0 * M * O + cn * M * O + m * O + o]) * + grad_out[b * O * N * K + o * N * K + n * K + k]; } + grad_scores[out_idx] = val; } #endif // ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh index 632b5c4..ba2af01 100644 --- a/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh @@ -21,36 +21,35 @@ __global__ void ball_query_forward_cuda_kernel(int b, int n, int m, // output: // idx: (B, M, nsample) int bs_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, m) { - if (bs_idx >= b) return; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || pt_idx >= m) return; - new_xyz += bs_idx * m * 3 + pt_idx * 3; - xyz += bs_idx * n * 3; - idx += bs_idx * m * nsample + pt_idx * nsample; + new_xyz += bs_idx * m * 3 + pt_idx * 3; + xyz += bs_idx * n * 3; + idx += bs_idx * m * nsample + pt_idx * nsample; - float max_radius2 = max_radius * max_radius; - float min_radius2 = min_radius * min_radius; - T new_x = new_xyz[0]; - T new_y = new_xyz[1]; - T new_z = new_xyz[2]; + float max_radius2 = max_radius * max_radius; + float min_radius2 = min_radius * min_radius; + T new_x = new_xyz[0]; + T new_y = new_xyz[1]; + T new_z = new_xyz[2]; - int cnt = 0; - for (int k = 0; k < n; ++k) { - T x = xyz[k * 3 + 0]; - T y = xyz[k * 3 + 1]; - T z = xyz[k * 3 + 2]; - T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + - (new_z - z) * (new_z - z); - if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) { - if (cnt == 0) { - for (int l = 0; l < nsample; ++l) { - idx[l] = k; - } + int cnt = 0; + for (int k = 0; k < n; ++k) { + T x = xyz[k * 3 + 0]; + T y = xyz[k * 3 + 1]; + T z = xyz[k * 3 + 2]; + T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + + (new_z - z) * (new_z - z); + if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) { + if (cnt == 0) { + for (int l = 0; l < nsample; ++l) { + idx[l] = k; } - idx[cnt] = k; - ++cnt; - if (cnt >= nsample) break; } + idx[cnt] = k; + ++cnt; + if (cnt >= nsample) break; } } } diff --git a/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh index 15bd91e..249c9e8 100644 --- a/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh @@ -8,27 +8,6 @@ #include "pytorch_cuda_helper.hpp" #endif -template -__device__ __forceinline__ void load_bbox(const T* bbox, const int base, T& x1, - T& y1, T& x2, T& y2) { - x1 = bbox[base]; - y1 = bbox[base + 1]; - x2 = bbox[base + 2]; - y2 = bbox[base + 3]; -} - -template <> -__device__ __forceinline__ void load_bbox(const float* bbox, - const int base, float& x1, - float& y1, float& x2, - float& y2) { - const float4 bbox_offset = reinterpret_cast(bbox + base)[0]; - x1 = bbox_offset.x; - y1 = bbox_offset.y; - x2 = bbox_offset.z; - y2 = bbox_offset.w; -} - template __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2, T* ious, const int num_bbox1, @@ -37,111 +16,69 @@ __global__ void bbox_overlaps_cuda_kernel(const T* bbox1, const T* bbox2, const int offset) { if (aligned) { CUDA_1D_KERNEL_LOOP(index, num_bbox1) { - const int b1 = index; - const int b2 = index; - - const int base1 = b1 << 2; // b1 * 4 - T b1_x1, b1_y1, b1_x2, b1_y2; - load_bbox(bbox1, base1, b1_x1, b1_y1, b1_x2, b1_y2); - const T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); - - const int base2 = b2 << 2; // b2 * 4 - T b2_x1, b2_y1, b2_x2, b2_y2; - load_bbox(bbox2, base2, b2_x1, b2_y1, b2_x2, b2_y2); - const T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); - - const T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); - const T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); - const T width = fmaxf(right - left + offset, 0.f); - const T height = fmaxf(bottom - top + offset, 0.f); - const T interS = width * height; - - const T baseS = - fmaxf(mode == 0 ? b1_area + b2_area - interS : b1_area, T(offset)); + int b1 = index; + int b2 = index; + + int base1 = b1 * 4; + T b1_x1 = bbox1[base1]; + T b1_y1 = bbox1[base1 + 1]; + T b1_x2 = bbox1[base1 + 2]; + T b1_y2 = bbox1[base1 + 3]; + T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); + + int base2 = b2 * 4; + T b2_x1 = bbox2[base2]; + T b2_y1 = bbox2[base2 + 1]; + T b2_x2 = bbox2[base2 + 2]; + T b2_y2 = bbox2[base2 + 3]; + T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); + + T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); + T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); + T width = fmaxf(right - left + offset, 0.f); + T height = fmaxf(bottom - top + offset, 0.f); + T interS = width * height; + T baseS = 1.0; + if (mode == 0) { + baseS = fmaxf(b1_area + b2_area - interS, T(offset)); + } else if (mode == 1) { + baseS = fmaxf(b1_area, T(offset)); + } ious[index] = interS / baseS; } } else { CUDA_1D_KERNEL_LOOP(index, num_bbox1 * num_bbox2) { - const int b1 = index / num_bbox2; - const int b2 = index % num_bbox2; - - const int base1 = b1 << 2; // b1 * 4 - T b1_x1, b1_y1, b1_x2, b1_y2; - load_bbox(bbox1, base1, b1_x1, b1_y1, b1_x2, b1_y2); - const T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); - - const int base2 = b2 << 2; // b2 * 4 - T b2_x1, b2_y1, b2_x2, b2_y2; - load_bbox(bbox2, base2, b2_x1, b2_y1, b2_x2, b2_y2); - const T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); - - const T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); - const T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); - const T width = fmaxf(right - left + offset, 0.f); - const T height = fmaxf(bottom - top + offset, 0.f); - const T interS = width * height; - - const T baseS = - fmaxf(mode == 0 ? b1_area + b2_area - interS : b1_area, T(offset)); + int b1 = index / num_bbox2; + int b2 = index % num_bbox2; + + int base1 = b1 * 4; + T b1_x1 = bbox1[base1]; + T b1_y1 = bbox1[base1 + 1]; + T b1_x2 = bbox1[base1 + 2]; + T b1_y2 = bbox1[base1 + 3]; + T b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset); + + int base2 = b2 * 4; + T b2_x1 = bbox2[base2]; + T b2_y1 = bbox2[base2 + 1]; + T b2_x2 = bbox2[base2 + 2]; + T b2_y2 = bbox2[base2 + 3]; + T b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset); + + T left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); + T top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); + T width = fmaxf(right - left + offset, 0.f); + T height = fmaxf(bottom - top + offset, 0.f); + T interS = width * height; + T baseS = 1.0; + if (mode == 0) { + baseS = fmaxf(b1_area + b2_area - interS, T(offset)); + } else if (mode == 1) { + baseS = fmaxf(b1_area, T(offset)); + } ious[index] = interS / baseS; } } } -#if __CUDA_ARCH__ >= 530 -__device__ __forceinline__ __half __half_area(const __half x1, const __half y1, - const __half x2, const __half y2, - const __half offset) { - const __half half_w = __hadd(__hsub(x2, x1), offset); - const __half half_h = __hadd(__hsub(y2, y1), offset); - return __hmul(half_w, half_h); -} - -__device__ __forceinline__ __half __half_max(const __half a, const __half b) { - return __hge(a, b) ? a : b; -} - -__device__ __forceinline__ __half __half_min(const __half a, const __half b) { - return __hle(a, b) ? a : b; -} - -// fp16 won't provide much increase when aligned==true. It is useful when -// aligned==false, which would give you ~40% bonus. -__device__ void bbox_overlaps_cuda_kernel_half( - const __half* bbox1, const __half* bbox2, __half* ious, const int num_bbox1, - const int num_bbox2, const int mode, const bool aligned, const int offset) { - const int num_output = aligned ? num_bbox1 : num_bbox1 * num_bbox2; - const __half h_offset = __int2half_rn(offset); - CUDA_1D_KERNEL_LOOP(index, num_output) { - const int b1 = aligned ? index : index / num_bbox2; - const int b2 = aligned ? index : index % num_bbox2; - - const int base1 = b1 << 2; - __half b1_x1, b1_y1, b1_x2, b1_y2; - load_bbox<__half>(bbox1, base1, b1_x1, b1_y1, b1_x2, b1_y2); - const __half b1_area = __half_area(b1_x1, b1_y1, b1_x2, b1_y2, h_offset); - - const int base2 = b2 << 2; - __half b2_x1, b2_y1, b2_x2, b2_y2; - load_bbox<__half>(bbox2, base2, b2_x1, b2_y1, b2_x2, b2_y2); - const __half b2_area = __half_area(b2_x1, b2_y1, b2_x2, b2_y2, h_offset); - - const __half left = __half_max(b1_x1, b2_x1), - right = __half_min(b1_x2, b2_x2); - const __half top = __half_max(b1_y1, b2_y1), - bottom = __half_min(b1_y2, b2_y2); - const __half width = - __half_max(__hadd(__hsub(right, left), h_offset), __float2half(0.f)); - const __half height = - __half_max(__hadd(__hsub(bottom, top), h_offset), __float2half(0.f)); - const __half interS = __hmul(width, height); - - const __half baseS = __half_max( - mode == 0 ? __hsub(__hadd(b1_area, b2_area), interS) : b1_area, - h_offset); - ious[index] = __hdiv(interS, baseS); - } -} -#endif // __CUDA_ARCH__ >= 530 - #endif // BBOX_OVERLAPS_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh deleted file mode 100644 index 5376104..0000000 --- a/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -// Modified from -// https://github.com/aim-uofa/AdelaiDet/blob/master/adet/layers/csrc/BezierAlign/BezierAlign_cuda.cu -#ifndef BEZIER_ALIGN_CUDA_KERNEL_CUH -#define BEZIER_ALIGN_CUDA_KERNEL_CUH - -#include -#ifdef MMCV_WITH_TRT -#include "common_cuda_helper.hpp" -#else // MMCV_WITH_TRT -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else // MMCV_USE_PARROTS -#include "pytorch_cuda_helper.hpp" -#endif // MMCV_USE_PARROTS -#endif // MMCV_WITH_TRT - -template -__device__ T bezier_curve(const T p0, const T p1, const T p2, const T p3, - const T u) { - return ((1. - u) * (1. - u) * (1. - u) * p0 + - 3. * u * (1. - u) * (1. - u) * p1 + 3. * u * u * (1. - u) * p2 + - u * u * u * p3); -} - -template -__global__ void bezier_align_forward_cuda_kernel( - const int nthreads, - const T *bottom_data, // inputs - const T *bottom_rois, // bottom rois contains the bezier curve - T *top_data, // outputs - const int pooled_height, const int pooled_width, const T spatial_scale, - const int sampling_ratio, bool aligned, const int channels, - const int height, const int width) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - // beziers have size Nx(1+8*2) = Nx17 - const T *offset_bottom_rois = bottom_rois + n * 17; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - - // TODO: avoid this by using parallel annotation, for good - T p0_x = offset_bottom_rois[1] * spatial_scale; - T p0_y = offset_bottom_rois[2] * spatial_scale; - T p1_x = offset_bottom_rois[3] * spatial_scale; - T p1_y = offset_bottom_rois[4] * spatial_scale; - T p2_x = offset_bottom_rois[5] * spatial_scale; - T p2_y = offset_bottom_rois[6] * spatial_scale; - T p3_x = offset_bottom_rois[7] * spatial_scale; - T p3_y = offset_bottom_rois[8] * spatial_scale; - T p4_x = offset_bottom_rois[15] * spatial_scale; - T p4_y = offset_bottom_rois[16] * spatial_scale; - T p5_x = offset_bottom_rois[13] * spatial_scale; - T p5_y = offset_bottom_rois[14] * spatial_scale; - T p6_x = offset_bottom_rois[11] * spatial_scale; - T p6_y = offset_bottom_rois[12] * spatial_scale; - T p7_x = offset_bottom_rois[9] * spatial_scale; - T p7_y = offset_bottom_rois[10] * spatial_scale; - - // compute the coords - const T u = pw / static_cast(pooled_width); - const T v = ph / static_cast(pooled_height); - const T x0 = bezier_curve(p0_x, p1_x, p2_x, p3_x, u); - const T y0 = bezier_curve(p0_y, p1_y, p2_y, p3_y, u); - const T x1 = bezier_curve(p4_x, p5_x, p6_x, p7_x, u); - const T y1 = bezier_curve(p4_y, p5_y, p6_y, p7_y, u); - const T x_center = x1 * v + x0 * (1. - v) - offset; - const T y_center = y1 * v + y0 * (1. - v) - offset; - - T roi_width = max(abs(p0_x - p3_x), abs(p4_x - p7_x)); - T roi_height = max(abs(p0_y - p3_y), abs(p4_y - p7_y)); - if (!aligned) { // for backward-compatibility only - roi_width = max(roi_width, (T)1.); - roi_height = max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - const T *offset_bottom_data = - bottom_data + (roi_batch_ind * channels + c) * height * width; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - // When the grid is empty, output zeros == 0/1, instead of NaN. - const T count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - T output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T y = y_center - (T)0.5 * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T x = x_center - (T)0.5 * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T val = bilinear_interpolate(offset_bottom_data, height, width, y, x, - index); - output_val += val; - } - } - output_val /= count; - - top_data[index] = output_val; - } -} - -template -__global__ void bezier_align_backward_cuda_kernel( - const int nthreads, const T *top_diff, const T *bottom_rois, T *bottom_diff, - const int pooled_height, const int pooled_width, const T spatial_scale, - const int sampling_ratio, bool aligned, const int channels, - const int height, const int width) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - // beziers have size Nx(1+8*2) = Nx17 - const T *offset_bottom_rois = bottom_rois + n * 17; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not use rounding; this implementation detail is critical - T offset = aligned ? (T)0.5 : (T)0.0; - T p0_x = offset_bottom_rois[1] * spatial_scale; - T p0_y = offset_bottom_rois[2] * spatial_scale; - T p1_x = offset_bottom_rois[3] * spatial_scale; - T p1_y = offset_bottom_rois[4] * spatial_scale; - T p2_x = offset_bottom_rois[5] * spatial_scale; - T p2_y = offset_bottom_rois[6] * spatial_scale; - T p3_x = offset_bottom_rois[7] * spatial_scale; - T p3_y = offset_bottom_rois[8] * spatial_scale; - T p4_x = offset_bottom_rois[15] * spatial_scale; - T p4_y = offset_bottom_rois[16] * spatial_scale; - T p5_x = offset_bottom_rois[13] * spatial_scale; - T p5_y = offset_bottom_rois[14] * spatial_scale; - T p6_x = offset_bottom_rois[11] * spatial_scale; - T p6_y = offset_bottom_rois[12] * spatial_scale; - T p7_x = offset_bottom_rois[9] * spatial_scale; - T p7_y = offset_bottom_rois[10] * spatial_scale; - - // compute the coords - const T u = pw / static_cast(pooled_width); - const T v = ph / static_cast(pooled_height); - const T x0 = bezier_curve(p0_x, p1_x, p2_x, p3_x, u); - const T y0 = bezier_curve(p0_y, p1_y, p2_y, p3_y, u); - const T x1 = bezier_curve(p4_x, p5_x, p6_x, p7_x, u); - const T y1 = bezier_curve(p4_y, p5_y, p6_y, p7_y, u); - const T x_center = x1 * v + x0 * (1. - v) - offset; - const T y_center = y1 * v + y0 * (1. - v) - offset; - - T roi_width = max(abs(p0_x - p3_x), abs(p4_x - p7_x)); - T roi_height = max(abs(p0_y - p3_y), abs(p4_y - p7_y)); - if (!aligned) { // for backward-compatibility only - roi_width = max(roi_width, (T)1.); - roi_height = max(roi_height, (T)1.); - } - T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); - T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); - - T *offset_bottom_diff = - bottom_diff + (roi_batch_ind * channels + c) * height * width; - - int top_offset = (n * channels + c) * pooled_height * pooled_width; - const T *offset_top_diff = top_diff + top_offset; - const T top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : ceil(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); - - // We do average (integral) pooling inside a bin - const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) // e.g., iy = 0, 1 - { - const T y = y_center - (T)0.5 * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const T x = x_center - (T)0.5 * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4, - x_low, x_high, y_low, y_high, index); - - T g1 = top_diff_this_bin * w1 / count; - T g2 = top_diff_this_bin * w2 / count; - T g3 = top_diff_this_bin * w3 / count; - T g4 = top_diff_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - atomicAdd(offset_bottom_diff + y_low * width + x_low, - static_cast(g1)); - atomicAdd(offset_bottom_diff + y_low * width + x_high, - static_cast(g2)); - atomicAdd(offset_bottom_diff + y_high * width + x_low, - static_cast(g3)); - atomicAdd(offset_bottom_diff + y_high * width + x_high, - static_cast(g4)); - } // if - } // ix - } // iy - } // CUDA_1D_KERNEL_LOOP -} // BezierAlignBackward - -#endif // BEZIER_ALIGN_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh b/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh deleted file mode 100644 index cf8ad5e..0000000 --- a/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#ifndef BOX_IOU_QUADRI_CUDA_CUH -#define BOX_IOU_QUADRI_CUDA_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif -#include "box_iou_rotated_utils.hpp" - -// 2D block with 32 * 16 = 512 threads per block -const int BLOCK_DIM_X = 32; -const int BLOCK_DIM_Y = 16; - -inline int divideUP(const int x, const int y) { return (((x) + (y)-1) / (y)); } - -template -__global__ void box_iou_quadri_cuda_kernel( - const int n_boxes1, const int n_boxes2, const T* dev_boxes1, - const T* dev_boxes2, T* dev_ious, const int mode_flag, const bool aligned) { - if (aligned) { - CUDA_1D_KERNEL_LOOP(index, n_boxes1) { - int b1 = index; - int b2 = index; - - int base1 = b1 * 8; - - float block_boxes1[8]; - float block_boxes2[8]; - - block_boxes1[0] = dev_boxes1[base1 + 0]; - block_boxes1[1] = dev_boxes1[base1 + 1]; - block_boxes1[2] = dev_boxes1[base1 + 2]; - block_boxes1[3] = dev_boxes1[base1 + 3]; - block_boxes1[4] = dev_boxes1[base1 + 4]; - block_boxes1[5] = dev_boxes1[base1 + 5]; - block_boxes1[6] = dev_boxes1[base1 + 6]; - block_boxes1[7] = dev_boxes1[base1 + 7]; - - int base2 = b2 * 8; - - block_boxes2[0] = dev_boxes2[base2 + 0]; - block_boxes2[1] = dev_boxes2[base2 + 1]; - block_boxes2[2] = dev_boxes2[base2 + 2]; - block_boxes2[3] = dev_boxes2[base2 + 3]; - block_boxes2[4] = dev_boxes2[base2 + 4]; - block_boxes2[5] = dev_boxes2[base2 + 5]; - block_boxes2[6] = dev_boxes2[base2 + 6]; - block_boxes2[7] = dev_boxes2[base2 + 7]; - - dev_ious[index] = - single_box_iou_quadri(block_boxes1, block_boxes2, mode_flag); - } - } else { - CUDA_1D_KERNEL_LOOP(index, n_boxes1 * n_boxes2) { - int b1 = index / n_boxes2; - int b2 = index % n_boxes2; - - int base1 = b1 * 8; - - float block_boxes1[8]; - float block_boxes2[8]; - - block_boxes1[0] = dev_boxes1[base1 + 0]; - block_boxes1[1] = dev_boxes1[base1 + 1]; - block_boxes1[2] = dev_boxes1[base1 + 2]; - block_boxes1[3] = dev_boxes1[base1 + 3]; - block_boxes1[4] = dev_boxes1[base1 + 4]; - block_boxes1[5] = dev_boxes1[base1 + 5]; - block_boxes1[6] = dev_boxes1[base1 + 6]; - block_boxes1[7] = dev_boxes1[base1 + 7]; - - int base2 = b2 * 8; - - block_boxes2[0] = dev_boxes2[base2 + 0]; - block_boxes2[1] = dev_boxes2[base2 + 1]; - block_boxes2[2] = dev_boxes2[base2 + 2]; - block_boxes2[3] = dev_boxes2[base2 + 3]; - block_boxes2[4] = dev_boxes2[base2 + 4]; - block_boxes2[5] = dev_boxes2[base2 + 5]; - block_boxes2[6] = dev_boxes2[base2 + 6]; - block_boxes2[7] = dev_boxes2[base2 + 7]; - - dev_ious[index] = - single_box_iou_quadri(block_boxes1, block_boxes2, mode_flag); - } - } -} - -#endif diff --git a/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh index 20fd617..07beeda 100644 --- a/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh @@ -8,7 +8,7 @@ #include "pytorch_cuda_helper.hpp" #endif -#ifdef MMCV_WITH_HIP +#ifdef HIP_DIFF #define WARP_SIZE 64 #else #define WARP_SIZE 32 @@ -29,22 +29,22 @@ __device__ inline int Loc2Index(const int n, const int c, const int h, int index = w + (h + (c + n * channel_num) * height) * width; return index; } -#ifndef MMCV_WITH_HIP +#ifndef HIP_DIFF /* TODO: move this to a common place */ template -__device__ inline scalar_t min(scalar_t a, scalar_t b) { +__device__ inline scalar_t mmcv_min(scalar_t a, scalar_t b) { return a < b ? a : b; } template -__device__ inline scalar_t max(scalar_t a, scalar_t b) { +__device__ inline scalar_t mmcv_max(scalar_t a, scalar_t b) { return a > b ? a : b; } #endif template __device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) { for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2) -#ifdef MMCV_WITH_HIP +#ifdef HIP_DIFF val += __shfl_down(val, offset); #else val += __shfl_down_sync(FULL_MASK, val, offset); @@ -55,11 +55,11 @@ __device__ __forceinline__ scalar_t warpReduceSum(scalar_t val) { template <> __device__ __forceinline__ phalf warpReduceSum(phalf val) { for (int offset = WARP_SIZE / 2; offset > 0; offset /= 2) -#ifdef MMCV_WITH_HIP - __PHALF(val) += __shfl_down(val, offset); +#ifdef HIP_DIFF + __PHALF(val) += __shfl_down(FULL_MASK, val, offset); #else __PHALF(val) += - __shfl_down_sync(FULL_MASK, __PHALF(val).operator __half(), offset); + __shfl_down_sync(FULL_MASK, static_cast<__half>(__PHALF(val)), offset); #endif return val; } @@ -316,7 +316,7 @@ __global__ void CARAFEBackward_Mask(const int num_kernels, output_val += top_diff[top_id] * bottom_data[bottom_id]; } } -#ifdef MMCV_WITH_HIP +#ifdef HIP_DIFF __syncthreads(); #else __syncwarp(); diff --git a/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh deleted file mode 100644 index 89feea4..0000000 --- a/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -// Modified from -// https://github.com/chrdiller/pyTorchChamferDistance/blob/master/chamfer_distance/chamfer_distance.cu -#ifndef CHAMFER_DISTANCE_CUDA_KERNEL_CUH -#define CHAMFER_DISTANCE_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -#define MAX_SHARED_SCALAR_T 6144 // 49152 / 8 = 6144 - -template -__global__ void chamfer_distance_forward_cuda_kernel(int b, int n, - const scalar_t* xyz, int m, - const scalar_t* xyz2, - scalar_t* result, - int* result_i) { - __shared__ scalar_t buf[MAX_SHARED_SCALAR_T]; - for (int i = blockIdx.x; i < b; i += gridDim.x) { - for (int k2 = 0; k2 < m; k2 += THREADS_PER_BLOCK) { - int end_k = min(m, k2 + THREADS_PER_BLOCK) - k2; - for (int j = threadIdx.x; j < end_k * 2; j += blockDim.x) { - buf[j] = xyz2[(i * m + k2) * 2 + j]; - } - __syncthreads(); - for (int j = threadIdx.x; j < n; j += blockDim.x * gridDim.y) { - scalar_t x1 = xyz[(i * n + j) * 2 + 0]; - scalar_t y1 = xyz[(i * n + j) * 2 + 1]; - int best_i = 0; - scalar_t best = 1e10; - int end_ka = end_k & (~2); - if (end_ka == THREADS_PER_BLOCK) { - for (int k = 0; k < THREADS_PER_BLOCK; k += 4) { -#pragma unroll - for (int j = 0; j < 4; ++j) { - scalar_t x2 = buf[(k + j) * 2] - x1; - scalar_t y2 = buf[(k + j) * 2 + 1] - y1; - scalar_t d = x2 * x2 + y2 * y2; - if (d < best) { - best = d; - best_i = k + k2 + j; - } - } - } - } else { - for (int k = 0; k < end_ka; k += 4) { -#pragma unroll - for (int j = 0; j < 4; ++j) { - scalar_t x2 = buf[(k + j) * 2] - x1; - scalar_t y2 = buf[(k + j) * 2 + 1] - y1; - scalar_t d = x2 * x2 + y2 * y2; - if (d < best) { - best = d; - best_i = k + k2 + j; - } - } - } - } - for (int k = end_ka; k < end_k; k++) { - scalar_t x2 = buf[k * 2 + 0] - x1; - scalar_t y2 = buf[k * 2 + 1] - y1; - scalar_t d = x2 * x2 + y2 * y2; - if (k == 0 || d < best) { - best = d; - best_i = k + k2; - } - } - if (k2 == 0 || result[(i * n + j)] > best) { - result[(i * n + j)] = best; - result_i[(i * n + j)] = best_i; - } - } - __syncthreads(); - } - } -} - -template -__global__ void chamfer_distance_backward_cuda_kernel( - int b, int n, const scalar_t* xyz1, int m, const scalar_t* xyz2, - const scalar_t* grad_dist1, const int* idx1, scalar_t* grad_xyz1, - scalar_t* grad_xyz2) { - for (int i = blockIdx.x; i < b; i += gridDim.x) { - for (int j = threadIdx.x; j < n; j += blockDim.x * gridDim.y) { - scalar_t x1 = xyz1[(i * n + j) * 2 + 0]; - scalar_t y1 = xyz1[(i * n + j) * 2 + 1]; - int j2 = idx1[i * n + j]; - scalar_t x2 = xyz2[(i * m + j2) * 2 + 0]; - scalar_t y2 = xyz2[(i * m + j2) * 2 + 1]; - scalar_t g = grad_dist1[i * n + j] * 2; - atomicAdd(&(grad_xyz1[(i * n + j) * 2 + 0]), g * (x1 - x2)); - atomicAdd(&(grad_xyz1[(i * n + j) * 2 + 1]), g * (y1 - y2)); - atomicAdd(&(grad_xyz2[(i * m + j2) * 2 + 0]), -(g * (x1 - x2))); - atomicAdd(&(grad_xyz2[(i * m + j2) * 2 + 1]), -(g * (y1 - y2))); - } - } -} -#endif // CHAMFER_DISTANCE_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp b/mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp index b12aa9a..dc5df17 100644 --- a/mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp +++ b/mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp @@ -7,20 +7,12 @@ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ i += blockDim.x * gridDim.x) -#define CUDA_2D_KERNEL_LOOP(i, n, j, m) \ - for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ - i += blockDim.x * gridDim.x) \ - for (size_t j = blockIdx.y * blockDim.y + threadIdx.y; j < (m); \ - j += blockDim.y * gridDim.y) - -#define CUDA_2D_KERNEL_BLOCK_LOOP(i, n, j, m) \ - for (size_t i = blockIdx.x; i < (n); i += gridDim.x) \ - for (size_t j = blockIdx.y; j < (m); j += gridDim.y) - #define THREADS_PER_BLOCK 512 -inline int GET_BLOCKS(const int N, const int num_threads = THREADS_PER_BLOCK) { - int optimal_block_num = (N + num_threads - 1) / num_threads; +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) + +inline int GET_BLOCKS(const int N) { + int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; int max_block_num = 4096; return min(optimal_block_num, max_block_num); } diff --git a/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh deleted file mode 100644 index 2af96f7..0000000 --- a/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh +++ /dev/null @@ -1,831 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -#ifndef CONVEX_IOU_CUDA_KERNEL_CUH -#define CONVEX_IOU_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -#define MAXN 100 -#define NMAX 512 -__device__ const double EPS = 1E-8; - -__device__ inline int sig(double d) { return (d > EPS) - (d < -EPS); } - -struct Point { - double x, y; - __device__ Point() {} - __device__ Point(double x, double y) : x(x), y(y) {} -}; - -__device__ inline bool point_same(Point& a, Point& b) { - return sig(a.x - b.x) == 0 && sig(a.y - b.y) == 0; -} - -__device__ inline void swap1(Point* a, Point* b) { - Point temp; - temp.x = a->x; - temp.y = a->y; - - a->x = b->x; - a->y = b->y; - - b->x = temp.x; - b->y = temp.y; -} - -__device__ inline void reverse1(Point* a, const int n) { - for (int i = 0; i < (n - 1) / 2.0; i++) { - Point* j = &(a[i]); - Point* k = &(a[n - 1 - i]); - swap1(j, k); - } -} - -__device__ inline double cross(Point o, Point a, Point b) { - return (a.x - o.x) * (b.y - o.y) - (b.x - o.x) * (a.y - o.y); -} - -__device__ inline double dis(Point a, Point b) { - return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); -} -__device__ inline double area(Point* ps, int n) { - ps[n] = ps[0]; - double res = 0; - for (int i = 0; i < n; i++) { - res += ps[i].x * ps[i + 1].y - ps[i].y * ps[i + 1].x; - } - return res / 2.0; -} -__device__ inline double polygon_area_grad(Point* ps, int n, - int* polygon_to_pred_index, - int n_pred, double* grad_C) { - ps[n] = ps[0]; - double partion_grad[4 * 30 + 2]; - double res = 0; - for (int i = 0; i < n; i++) { - res += ps[i].x * ps[i + 1].y - ps[i].y * ps[i + 1].x; - partion_grad[i * 4 + 2] = ps[i + 1].y; - partion_grad[i * 4 + 3] = -ps[i + 1].x; - if (i != n - 1) { - partion_grad[i * 4 + 4] = -ps[i].y; - partion_grad[i * 4 + 5] = ps[i].x; - } else { - partion_grad[0] = -ps[i].y; - partion_grad[1] = ps[i].x; - } - } - for (int i = 0; i < n; i++) { - for (int j = 0; j < n_pred; j++) { - if (i == polygon_to_pred_index[j]) { - grad_C[2 * polygon_to_pred_index[j + n_pred]] = - (partion_grad[i * 4] + partion_grad[i * 4 + 2]) / 2; - break; - } - } - for (int j = 0; j < n_pred; j++) { - if (i == polygon_to_pred_index[j]) { - grad_C[2 * polygon_to_pred_index[j + n_pred] + 1] = - (partion_grad[i * 4 + 1] + partion_grad[i * 4 + 1 + 2]) / 2; - break; - } - } - } - - return res / 2.0; -} - -__device__ inline int lineCross(Point a, Point b, Point c, Point d, Point& p, - double* cut_grad, int m, int n, int i) { - double s1, s2; - double s2_s1_2; - double ds1_dxc, ds1_dyc, ds2_dxd, ds2_dyd; - double dxp_dxc, dxp_dyc, dxp_dxd, dxp_dyd, dyp_dxc, dyp_dyc, dyp_dxd, dyp_dyd; - s1 = cross(a, b, c); - s2 = cross(a, b, d); - - ds1_dxc = -(b.y - a.y); - ds1_dyc = b.x - a.x; - ds2_dxd = ds1_dxc; - ds2_dyd = ds1_dyc; - s2_s1_2 = (s2 - s1) * (s2 - s1); - - if (sig(s1) == 0 && sig(s2) == 0) return 2; - if (sig(s2 - s1) == 0) return 0; - - dxp_dxc = - ((s2 - d.x * ds1_dxc) * (s2 - s1) - (c.x * s2 - d.x * s1) * (-ds1_dxc)) / - (s2_s1_2); - dxp_dyc = - ((0 - d.x * ds1_dyc) * (s2 - s1) - (c.x * s2 - d.x * s1) * (-ds1_dyc)) / - (s2_s1_2); - dxp_dxd = - ((c.x * ds2_dxd - s1) * (s2 - s1) - (c.x * s2 - d.x * s1) * (ds2_dxd)) / - (s2_s1_2); - dxp_dyd = - ((c.x * ds2_dyd - 0) * (s2 - s1) - (c.x * s2 - d.x * s1) * (ds2_dyd)) / - (s2_s1_2); - - dyp_dxc = - ((0 - d.y * ds1_dxc) * (s2 - s1) - (c.y * s2 - d.y * s1) * (-ds1_dxc)) / - (s2_s1_2); - dyp_dyc = - ((s2 - d.y * ds1_dyc) * (s2 - s1) - (c.y * s2 - d.y * s1) * (-ds1_dyc)) / - (s2_s1_2); - dyp_dxd = - ((c.y * ds2_dxd - 0) * (s2 - s1) - (c.y * s2 - d.y * s1) * (ds2_dxd)) / - (s2_s1_2); - dyp_dyd = - ((c.y * ds2_dyd - s1) * (s2 - s1) - (c.y * s2 - d.y * s1) * (ds2_dyd)) / - (s2_s1_2); - - p.x = (c.x * s2 - d.x * s1) / (s2 - s1); - p.y = (c.y * s2 - d.y * s1) / (s2 - s1); - if (i == n - 1) { - cut_grad[4 * n * m + 4 * i] = dxp_dxc; // + dyp_dxc; - cut_grad[4 * n * m + 4 * i + 1] = dyp_dxc; - cut_grad[4 * n * m + 4 * i + 2] = dxp_dyc; // + dyp_dyc; - cut_grad[4 * n * m + 4 * i + 3] = dyp_dyc; - cut_grad[4 * n * m + 0] = dxp_dxd; // + dyp_dxd; - cut_grad[4 * n * m + 1] = dyp_dxd; - cut_grad[4 * n * m + 2] = dxp_dyd; // + dyp_dyd; - cut_grad[4 * n * m + 3] = dyp_dyd; - } else { - cut_grad[4 * n * m + 4 * i] = dxp_dxc; // + dyp_dxc; - cut_grad[4 * n * m + 4 * i + 1] = dyp_dxc; - cut_grad[4 * n * m + 4 * i + 2] = dxp_dyc; // + dyp_dyc; - cut_grad[4 * n * m + 4 * i + 3] = dyp_dyc; - cut_grad[4 * n * m + 4 * (i + 1)] = dxp_dxd; // + dyp_dxd; - cut_grad[4 * n * m + 4 * (i + 1) + 1] = dyp_dxd; - cut_grad[4 * n * m + 4 * (i + 1) + 2] = dxp_dyd; // + dyp_dyd; - cut_grad[4 * n * m + 4 * (i + 1) + 3] = dyp_dyd; - } - - return 1; -} -__device__ inline void polygon_cut(Point* p, int& n, Point a, Point b, - double* cut_grad) { - Point pp[MAXN]; - double ccur_grad[MAXN] = {}; - int m = 0; - p[n] = p[0]; - int k = n; - for (int i = 0; i < n; i++) { - if (sig(cross(a, b, p[i])) > 0) { - pp[m] = p[i]; - ccur_grad[4 * n * m + 4 * i] = 1.0; - ccur_grad[4 * n * m + 4 * i + 3] = 1.0; - m++; - } - if (sig(cross(a, b, p[i])) != sig(cross(a, b, p[i + 1]))) { - lineCross(a, b, p[i], p[i + 1], pp[m], ccur_grad, m, n, i); - m++; - } - } - - n = 0; - for (int i = 0; i < m; i++) { - if (!i || !(point_same(pp[i], pp[i - 1]))) { - p[n] = pp[i]; - for (int j = 0; j < 4 * k; j++) { - cut_grad[4 * k * n + j] = ccur_grad[4 * k * i + j]; - } - n++; - } - } - - while (n > 1 && point_same(p[n - 1], p[0])) n--; -} - -__device__ inline double intersectArea(Point a, Point b, Point c, Point d, - double* grad_AB, int order, - int convex_n) { - Point o(0, 0); - int res_flag = 0; - int s1 = sig(cross(o, a, b)); - int s2 = sig(cross(o, c, d)); - if (s1 == 0 || s2 == 0) return 0.0; - if (s1 == -1) { - Point* i = &a; - Point* j = &b; - swap1(i, j); - res_flag = 1; - } - if (s2 == -1) { - Point* i = &c; - Point* j = &d; - swap1(i, j); - } - Point p[10] = {o, a, b}; - int n = 3, n0 = 3, n1, n2, n3; - double cut_grad1[MAXN] = {}; - double cut_grad2[MAXN] = {}; - double cut_grad3[MAXN] = {}; - double p1_p_grad[10][10] = {}; - double p2_p1_grad[10][10] = {}; - double p3_p2_grad[10][10] = {}; - - double p3_p1_grad[10][10] = {}; - double p3_p_grad[10][10] = {}; - - // 1 - polygon_cut(p, n, o, c, cut_grad1); - n1 = n; - for (int i = 0; i < n; i++) { - for (int j = 0; j < 4 * n0; j++) { - if (!(j % 2)) { - p1_p_grad[2 * i][j / 2] = cut_grad1[4 * n0 * i + j]; - } else { - p1_p_grad[2 * i + 1][j / 2] = cut_grad1[4 * n0 * i + j]; - } - } - } - - // 2 - polygon_cut(p, n, c, d, cut_grad2); - n2 = n; - for (int i = 0; i < n; i++) { - for (int j = 0; j < 4 * n1; j++) { - if (!(j % 2)) { - p2_p1_grad[2 * i][j / 2] = cut_grad2[4 * n1 * i + j]; - } else { - p2_p1_grad[2 * i + 1][j / 2] = cut_grad2[4 * n1 * i + j]; - } - } - } - // 3 - polygon_cut(p, n, d, o, cut_grad3); - n3 = n; - for (int i = 0; i < n; i++) { - for (int j = 0; j < 4 * n2; j++) { - if (!(j % 2)) { - p3_p2_grad[2 * i][j / 2] = cut_grad3[4 * n2 * i + j]; - } else { - p3_p2_grad[2 * i + 1][j / 2] = cut_grad3[4 * n2 * i + j]; - } - } - } - - // mul - // p3_p2(n3 * n2) * p2_p1(n2 * n1) = p3_p1 (n3 * n1) - for (int i = 0; i < 2 * n3; i++) { - for (int j = 0; j < 2 * n1; j++) { - double sum = 0.0; - for (int m = 0; m < 2 * n2; m++) { - sum = sum + p3_p2_grad[i][m] * p2_p1_grad[m][j]; - } - p3_p1_grad[i][j] = sum; - } - } - - // p3_p1 (n3 * n1) * p1_p (n1 * n0) = p3_p (n3 * n0) - for (int i = 0; i < 2 * n3; i++) { - for (int j = 0; j < 2 * n0; j++) { - double sum = 0.0; - for (int m = 0; m < 2 * n1; m++) { - sum = sum + p3_p1_grad[i][m] * p1_p_grad[m][j]; - } - p3_p_grad[i][j] = sum; - } - } - - // calculate S_grad - int polygon_index_box_index[20]; - double grad_polygon[20]; - double S_grad[6]; - - for (int i = 0; i < n3; i++) { - polygon_index_box_index[i] = i; - polygon_index_box_index[i + n3] = i; - } - - double res = - polygon_area_grad(p, n3, polygon_index_box_index, n3, grad_polygon); - - if (s1 * s2 == -1) { - for (int j = 0; j < 2 * 3; j++) { - double sum = 0.0; - for (int m = 0; m < 2 * n3; m++) { - sum = sum - grad_polygon[m] * p3_p_grad[m][j]; - } - S_grad[j] = sum; - } - - if (order != convex_n - 1) { - if (res_flag) { - grad_AB[2 * order] += S_grad[4]; - grad_AB[2 * order + 1] += S_grad[5]; - grad_AB[2 * order + 2] += S_grad[2]; - grad_AB[2 * order + 3] += S_grad[3]; - - } else { - grad_AB[2 * order] += S_grad[2]; - grad_AB[2 * order + 1] += S_grad[3]; - grad_AB[2 * order + 2] += S_grad[4]; - grad_AB[2 * order + 3] += S_grad[5]; - } - } else { - if (res_flag) { - grad_AB[2 * order] += S_grad[4]; - grad_AB[2 * order + 1] += S_grad[5]; - grad_AB[0] += S_grad[2]; - grad_AB[1] += S_grad[3]; - - } else { - grad_AB[2 * order] += S_grad[2]; - grad_AB[2 * order + 1] += S_grad[3]; - grad_AB[0] += S_grad[4]; - grad_AB[1] += S_grad[5]; - } - } - res = -res; - } else { - for (int j = 0; j < 2 * 3; j++) { - double sum = 0.0; - for (int m = 0; m < 2 * n3; m++) { - sum = sum + grad_polygon[m] * p3_p_grad[m][j]; - } - S_grad[j] = sum; - } - - if (order != convex_n - 1) { - if (res_flag) { - grad_AB[2 * order] += S_grad[4]; - grad_AB[2 * order + 1] += S_grad[5]; - grad_AB[2 * order + 2] += S_grad[2]; - grad_AB[2 * order + 3] += S_grad[3]; - } else { - grad_AB[2 * order] += S_grad[2]; - grad_AB[2 * order + 1] += S_grad[3]; - grad_AB[2 * order + 2] += S_grad[4]; - grad_AB[2 * order + 3] += S_grad[5]; - } - } else { - if (res_flag) { - grad_AB[2 * order] += S_grad[4]; - grad_AB[2 * order + 1] += S_grad[5]; - grad_AB[0] += S_grad[2]; - grad_AB[1] += S_grad[3]; - } else { - grad_AB[2 * order] += S_grad[2]; - grad_AB[2 * order + 1] += S_grad[3]; - grad_AB[0] += S_grad[4]; - grad_AB[1] += S_grad[5]; - } - } - } - return res; -} - -__device__ inline double intersectAreaO(Point* ps1, int n1, Point* ps2, int n2, - double* grad_AB) { - if (area(ps1, n1) < 0) reverse1(ps1, n1); - if (area(ps2, n2) < 0) reverse1(ps2, n2); - ps1[n1] = ps1[0]; - ps2[n2] = ps2[0]; - double res = 0; - for (int i = 0; i < n1; i++) { - for (int j = 0; j < n2; j++) { - res += - intersectArea(ps1[i], ps1[i + 1], ps2[j], ps2[j + 1], grad_AB, i, n1); - } - } - return res; -} - -__device__ inline void Jarvis(Point* in_poly, int& n_poly) { - Point p_max, p_k; - int max_index, k_index; - int Stack[NMAX] = {}, top1, top2; - double sign; - Point right_point[10], left_point[10]; - - for (int i = 0; i < n_poly; i++) { - if (in_poly[i].y < in_poly[0].y || - in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) { - Point* j = &(in_poly[0]); - Point* k = &(in_poly[i]); - swap1(j, k); - } - if (i == 0) { - p_max = in_poly[0]; - max_index = 0; - } - if (in_poly[i].y > p_max.y || - in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) { - p_max = in_poly[i]; - max_index = i; - } - } - - if (max_index == 0) { - max_index = 1; - p_max = in_poly[max_index]; - } - - k_index = 0, Stack[0] = 0, top1 = 0; - while (k_index != max_index) { - p_k = p_max; - k_index = max_index; - for (int i = 1; i < n_poly; i++) { - sign = cross(in_poly[Stack[top1]], in_poly[i], p_k); - if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) > - dis(in_poly[Stack[top1]], p_k)))) { - p_k = in_poly[i]; - k_index = i; - } - } - top1++; - Stack[top1] = k_index; - } - for (int i = 0; i <= top1; i++) right_point[i] = in_poly[Stack[i]]; - - k_index = 0, Stack[0] = 0, top2 = 0; - - while (k_index != max_index) { - p_k = p_max; - k_index = max_index; - for (int i = 1; i < n_poly; i++) { - sign = cross(in_poly[Stack[top2]], in_poly[i], p_k); - if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) > - dis(in_poly[Stack[top2]], p_k))) { - p_k = in_poly[i]; - k_index = i; - } - } - top2++; - Stack[top2] = k_index; - } - for (int i = top2 - 1; i >= 0; i--) left_point[i] = in_poly[Stack[i]]; - - for (int i = 0; i < top1 + top2; i++) { - if (i <= top1) { - in_poly[i] = right_point[i]; - } else { - in_poly[i] = left_point[top2 - (i - top1)]; - } - } - n_poly = top1 + top2; -} - -__device__ inline double intersectAreaPoly(Point* ps1, int n1, Point* ps2, - int n2, double* grad_C) { - Point polygon[MAXN]; - int n = n1 + n2, n_poly = 0; - for (int i = 0; i < n1; i++) { - for (int j = 0; j < n - n1; j++) { - if (point_same(ps1[i], ps2[j])) { - for (int k = j; k < n - n1 - 1; k++) { - ps2[k] = ps2[k + 1]; - } - n2--; - break; - } - } - } - n_poly = n1 + n2; - for (int i = 0; i < n_poly; i++) { - if (i < n1) { - polygon[i] = ps1[i]; - } else { - polygon[i] = ps2[i - n1]; - } - } - - Jarvis(polygon, n_poly); - - int polygon_to_pred_index[18] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}; - int n_pred = 0; - for (int i = 0; i < n_poly; i++) { - for (int j = 0; j < n1; j++) { - if (polygon[i].x == ps1[j].x && polygon[i].y == ps1[j].y) { - polygon_to_pred_index[n_pred] = i; - polygon_to_pred_index[n_pred + n1] = j; - n_pred += 1; - break; - } - } - } - if (n_pred == 0) { - double polygon_area = fabs(area(polygon, n_poly)); - for (int i = 0; i < 18; i++) { - grad_C[i] = 0.0; - } - return polygon_area; - } else { - double polygon_area = - polygon_area_grad(polygon, n_poly, polygon_to_pred_index, n1, grad_C); - if (polygon_area < 0) { - for (int i = 0; i < 18; i++) { - grad_C[i] = -grad_C[i]; - } - } - return fabs(polygon_area); - } -} - -// convex_find and get the polygon_index_box_index -__device__ inline void Jarvis_and_index(Point* in_poly, int& n_poly, - int* points_to_convex_ind) { - int n_input = n_poly; - Point input_poly[20]; - for (int i = 0; i < n_input; i++) { - input_poly[i].x = in_poly[i].x; - input_poly[i].y = in_poly[i].y; - } - Point p_max, p_k; - int max_index, k_index; - int Stack[20], top1, top2; - double sign; - Point right_point[10], left_point[10]; - - for (int i = 0; i < n_poly; i++) { - if (in_poly[i].y < in_poly[0].y || - in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) { - Point* j = &(in_poly[0]); - Point* k = &(in_poly[i]); - swap1(j, k); - } - if (i == 0) { - p_max = in_poly[0]; - max_index = 0; - } - if (in_poly[i].y > p_max.y || - in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) { - p_max = in_poly[i]; - max_index = i; - } - } - if (max_index == 0) { - max_index = 1; - p_max = in_poly[max_index]; - } - - k_index = 0, Stack[0] = 0, top1 = 0; - while (k_index != max_index) { - p_k = p_max; - k_index = max_index; - for (int i = 1; i < n_poly; i++) { - sign = cross(in_poly[Stack[top1]], in_poly[i], p_k); - if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) > - dis(in_poly[Stack[top1]], p_k)))) { - p_k = in_poly[i]; - k_index = i; - } - } - top1++; - Stack[top1] = k_index; - } - for (int i = 0; i <= top1; i++) { - right_point[i] = in_poly[Stack[i]]; - } - - k_index = 0, Stack[0] = 0, top2 = 0; - - while (k_index != max_index) { - p_k = p_max; - k_index = max_index; - for (int i = 1; i < n_poly; i++) { - sign = cross(in_poly[Stack[top2]], in_poly[i], p_k); - if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) > - dis(in_poly[Stack[top2]], p_k))) { - p_k = in_poly[i]; - k_index = i; - } - } - top2++; - Stack[top2] = k_index; - } - - for (int i = top2 - 1; i >= 0; i--) { - left_point[i] = in_poly[Stack[i]]; - } - - for (int i = 0; i < top1 + top2; i++) { - if (i <= top1) { - in_poly[i] = right_point[i]; - } else { - in_poly[i] = left_point[top2 - (i - top1)]; - } - } - n_poly = top1 + top2; - for (int i = 0; i < n_poly; i++) { - for (int j = 0; j < n_input; j++) { - if (point_same(in_poly[i], input_poly[j])) { - points_to_convex_ind[i] = j; - break; - } - } - } -} - -template -__device__ inline float devrIoU(T const* const p, T const* const q, - T* point_grad, const int idx) { - Point ps1[MAXN], ps2[MAXN]; - - Point convex[MAXN]; - for (int i = 0; i < 9; i++) { - convex[i].x = (double)p[i * 2]; - convex[i].y = (double)p[i * 2 + 1]; - } - int n_convex = 9; - int points_to_convex_ind[9] = {-1, -1, -1, -1, -1, -1, -1, -1, -1}; - Jarvis_and_index(convex, n_convex, points_to_convex_ind); - - int n1 = n_convex; - int n2 = 4; - - for (int i = 0; i < n1; i++) { - ps1[i].x = (double)convex[i].x; - ps1[i].y = (double)convex[i].y; - } - - for (int i = 0; i < n2; i++) { - ps2[i].x = (double)q[i * 2]; - ps2[i].y = (double)q[i * 2 + 1]; - } - - int polygon_index_box_index[18]; - for (int i = 0; i < n1; i++) { - polygon_index_box_index[i] = i; - polygon_index_box_index[i + n1] = i; - } - - double grad_A[18] = {}; - double grad_AB[18] = {}; - double grad_C[18] = {}; - - double inter_area = intersectAreaO(ps1, n1, ps2, n2, grad_AB); - double S_pred = - polygon_area_grad(ps1, n1, polygon_index_box_index, n1, grad_A); - if (S_pred < 0) { - for (int i = 0; i < n_convex * 2; i++) { - grad_A[i] = -grad_A[i]; - } - } - double union_area = fabs(S_pred) + fabs(area(ps2, n2)) - inter_area; - - double iou = inter_area / union_area; - double polygon_area = intersectAreaPoly(ps1, n1, ps2, n2, grad_C); - - // printf("%d:live\n", idx); - double rot_giou = iou - (polygon_area - union_area) / polygon_area; - - float grad_point_temp[18] = {}; - - for (int i = 0; i < n_convex; i++) { - int grad_point = points_to_convex_ind[i]; - grad_point_temp[2 * grad_point] = - (float)((union_area + inter_area) / (union_area * union_area) * - grad_AB[2 * i] - - iou / union_area * grad_A[2 * i] - - 1 / polygon_area * (grad_AB[2 * i] - grad_A[2 * i]) - - (union_area) / polygon_area / polygon_area * grad_C[2 * i]); - grad_point_temp[2 * grad_point + 1] = - (float)((union_area + inter_area) / (union_area * union_area) * - grad_AB[2 * i + 1] - - iou / union_area * grad_A[2 * i + 1] - - 1 / polygon_area * (grad_AB[2 * i + 1] - grad_A[2 * i + 1]) - - (union_area) / polygon_area / polygon_area * grad_C[2 * i + 1]); - } - - for (int i = 0; i < 9; i++) { - point_grad[2 * i] = grad_point_temp[2 * i]; - point_grad[2 * i + 1] = grad_point_temp[2 * i + 1]; - } - return (float)rot_giou; -} - -template -__global__ void convex_giou_cuda_kernel(const int ex_n_boxes, - const int gt_n_boxes, const T* ex_boxes, - const T* gt_boxes, T* point_grad) { - CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) { - const T* cur_box = ex_boxes + index * 18; - const T* cur_gt_box = gt_boxes + index * 8; - T* cur_grad = point_grad + index * 19; - T giou = devrIoU(cur_box, cur_gt_box, cur_grad, threadIdx.x); - cur_grad[18] = giou; - } -} - -__device__ inline int lineCross(Point a, Point b, Point c, Point d, Point& p) { - double s1, s2; - s1 = cross(a, b, c); - s2 = cross(a, b, d); - if (sig(s1) == 0 && sig(s2) == 0) return 2; - if (sig(s2 - s1) == 0) return 0; - p.x = (c.x * s2 - d.x * s1) / (s2 - s1); - p.y = (c.y * s2 - d.y * s1) / (s2 - s1); - return 1; -} - -__device__ inline void polygon_cut(Point* p, int& n, Point a, Point b) { - Point pp[MAXN]; - int m = 0; - p[n] = p[0]; - for (int i = 0; i < n; i++) { - if (sig(cross(a, b, p[i])) > 0) { - pp[m] = p[i]; - m++; - } - if (sig(cross(a, b, p[i])) != sig(cross(a, b, p[i + 1]))) { - lineCross(a, b, p[i], p[i + 1], pp[m]); - m++; - } - } - n = 0; - for (int i = 0; i < m; i++) { - if (!i || !(point_same(pp[i], pp[i - 1]))) { - p[n] = pp[i]; - n++; - } - } - - while (n > 1 && point_same(p[n - 1], p[0])) n--; -} - -__device__ inline double intersectArea(Point a, Point b, Point c, Point d) { - Point o(0, 0); - int s1 = sig(cross(o, a, b)); - int s2 = sig(cross(o, c, d)); - if (s1 == 0 || s2 == 0) return 0.0; - if (s1 == -1) { - Point* i = &a; - Point* j = &b; - swap1(i, j); - } - if (s2 == -1) { - Point* i = &c; - Point* j = &d; - swap1(i, j); - } - Point p[10] = {o, a, b}; - int n = 3; - - polygon_cut(p, n, o, c); - polygon_cut(p, n, c, d); - polygon_cut(p, n, d, o); - double res = area(p, n); - if (s1 * s2 == -1) res = -res; - return res; -} -__device__ inline double intersectAreaO(Point* ps1, int n1, Point* ps2, - int n2) { - if (area(ps1, n1) < 0) reverse1(ps1, n1); - if (area(ps2, n2) < 0) reverse1(ps2, n2); - ps1[n1] = ps1[0]; - ps2[n2] = ps2[0]; - double res = 0; - for (int i = 0; i < n1; i++) { - for (int j = 0; j < n2; j++) { - res += intersectArea(ps1[i], ps1[i + 1], ps2[j], ps2[j + 1]); - } - } - return res; -} - -template -__device__ inline float devrIoU(T const* const p, T const* const q) { - Point ps1[MAXN], ps2[MAXN]; - Point convex[MAXN]; - for (int i = 0; i < 9; i++) { - convex[i].x = (double)p[i * 2]; - convex[i].y = (double)p[i * 2 + 1]; - } - int n_convex = 9; - int points_to_convex_ind[9] = {-1, -1, -1, -1, -1, -1, -1, -1, -1}; - Jarvis_and_index(convex, n_convex, points_to_convex_ind); - int n1 = n_convex; - for (int i = 0; i < n1; i++) { - ps1[i].x = (double)convex[i].x; - ps1[i].y = (double)convex[i].y; - } - int n2 = 4; - for (int i = 0; i < n2; i++) { - ps2[i].x = (double)q[i * 2]; - ps2[i].y = (double)q[i * 2 + 1]; - } - double inter_area = intersectAreaO(ps1, n1, ps2, n2); - double S_pred = area(ps1, n1); - double union_area = fabs(S_pred) + fabs(area(ps2, n2)) - inter_area; - double iou = inter_area / union_area; - return (float)iou; -} - -template -__global__ void convex_iou_cuda_kernel(const int ex_n_boxes, - const int gt_n_boxes, const T* ex_boxes, - const T* gt_boxes, T* iou) { - CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) { - const T* cur_box = ex_boxes + index * 18; - for (int i = 0; i < gt_n_boxes; i++) { - iou[index * gt_n_boxes + i] = devrIoU(cur_box, gt_boxes + i * 8); - } - } -} -#endif // CONVEX_IOU_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh b/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh index f910561..75ea4ad 100644 --- a/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh +++ b/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh @@ -29,25 +29,21 @@ using namespace torch; #define TensorAcc5R PackedTensorAccessor32 #define WITHIN_BOUNDS(x, y, H, W) (x >= 0 && x < H && y >= 0 && y < W) -#define WARP_SIZE 32 -#define FULL_MASK 0xffffffff +#define THREADS_FORWARD 32 +#define THREADS_BACKWARD 16 template __global__ void correlation_forward_cuda_kernel( const TensorAcc4R rInput1, const TensorAcc4R rInput2, TensorAcc5R output, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, - int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW, - int oH, int oW) { + int dilationW, int dilation_patchH, int dilation_patchW, int dH, int dW) { const int iH = rInput1.size(1); const int iW = rInput1.size(2); const int C = rInput1.size(3); const int n = blockIdx.x; - const int h = blockIdx.y * blockDim.y + threadIdx.y; - const int w = blockIdx.z * blockDim.z + threadIdx.z; - - if (h >= oH || w >= oW) return; - + const int h = blockIdx.y; + const int w = blockIdx.z; const int thread = threadIdx.x; const int start_i = -padH + h * dH; @@ -56,37 +52,40 @@ __global__ void correlation_forward_cuda_kernel( const int patchRadH = dilation_patchH * (patchH - 1) / 2; const int patchRadW = dilation_patchW * (patchW - 1) / 2; + __shared__ scalar_t prod_sum[THREADS_FORWARD]; + for (int ph = 0; ph < patchH; ++ph) { int ph_dilated = ph * dilation_patchH - patchRadH; for (int pw = 0; pw < patchW; ++pw) { int pw_dilated = pw * dilation_patchW - patchRadW; - scalar_t prod_sum = 0.0f; + prod_sum[thread] = 0; for (int i = 0; i < kH; ++i) { int i1 = start_i + i * dilationH; int i2 = i1 + ph_dilated; - if (WITHIN_BOUNDS(i1, i2, iH, iH)) { - for (int j = 0; j < kW; ++j) { - int j1 = start_j + j * dilationW; - int j2 = j1 + pw_dilated; - if (WITHIN_BOUNDS(j1, j2, iW, iW)) { - for (int c = thread; c < C; c += WARP_SIZE) { - scalar_t v1 = rInput1[n][i1][j1][c]; - scalar_t v2 = rInput2[n][i2][j2][c]; - prod_sum += v1 * v2; - } + if + WITHIN_BOUNDS(i1, i2, iH, iH) { + for (int j = 0; j < kW; ++j) { + int j1 = start_j + j * dilationW; + int j2 = j1 + pw_dilated; + if + WITHIN_BOUNDS(j1, j2, iW, iW) { + for (int c = thread; c < C; c += THREADS_FORWARD) { + scalar_t v1 = rInput1[n][i1][j1][c]; + scalar_t v2 = rInput2[n][i2][j2][c]; + prod_sum[thread] += v1 * v2; + } + } } } - } } // accumulate - for (int offset = 16; offset > 0; offset /= 2) -#ifdef MMCV_WITH_HIP - prod_sum += __shfl_down(float(prod_sum), offset); -#else - prod_sum += __shfl_down_sync(FULL_MASK, float(prod_sum), offset); -#endif + __syncthreads(); if (thread == 0) { - output[n][ph][pw][h][w] = prod_sum; + scalar_t reduce_sum = 0; + for (int index = 0; index < THREADS_FORWARD; ++index) { + reduce_sum += prod_sum[index]; + } + output[n][ph][pw][h][w] = reduce_sum; } } } @@ -98,10 +97,9 @@ __global__ void correlation_backward_cuda_kernel_input1( TensorAcc4R grad_input1, const int kH, const int kW, const int patchH, const int patchW, const int padH, const int padW, const int dilationH, const int dilationW, const int dilation_patchH, const int dilation_patchW, - const int dH, const int dW) { - const int iH = input2.size(1); - const int iW = input2.size(2); - const int C = input2.size(3); + const int dH, const int dW, const int batch) { + const int iH = input2.size(2); + const int iW = input2.size(3); const int H = grad_output.size(3); const int W = grad_output.size(4); @@ -109,53 +107,54 @@ __global__ void correlation_backward_cuda_kernel_input1( const int patchRadH = (patchH - 1) / 2; const int patchRadW = (patchW - 1) / 2; - const int n = blockIdx.x; + const int n = batch; + const int c = blockIdx.x; const int h = blockIdx.y; const int w = blockIdx.z; + const int ph_off = threadIdx.x; + const int pw_off = threadIdx.y; const int h_2 = h + padH; const int w_2 = w + padW; const int min_h = h_2 - kH * dilationH; const int min_w = w_2 - kW * dilationW; - extern __shared__ __align__(sizeof(4)) unsigned char grad_cache_char[]; - scalar_t *grad_cache = reinterpret_cast(grad_cache_char); - for (int i = threadIdx.x; i < patchH * patchW; i += blockDim.x) { - const int ph = i / patchW; - const int pw = i % patchW; + __shared__ scalar_t prod_sum[THREADS_BACKWARD][THREADS_BACKWARD]; + prod_sum[ph_off][pw_off] = 0; + + for (int ph = ph_off; ph < patchH; ph += THREADS_BACKWARD) { int i1 = h + dilation_patchH * (ph - patchRadH); - int j1 = w + dilation_patchW * (pw - patchRadW); - - if (WITHIN_BOUNDS(i1, j1, iH, iW)) { - scalar_t grad_val = 0.0f; - for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) { - int i2 = (h_3) / dH; - if (i2 * dH != h_3) continue; - for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) { - int j2 = (w_3) / dW; - if (j2 * dW != w_3) continue; - if (WITHIN_BOUNDS(i2, j2, H, W)) { - grad_val += grad_output[n][ph][pw][i2][j2]; + for (int pw = pw_off; pw < patchW; pw += THREADS_BACKWARD) { + int j1 = w + dilation_patchW * (pw - patchRadW); + if (WITHIN_BOUNDS(i1, j1, iH, iW)) { + scalar_t val = input2[n][c][i1][j1]; + for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) { + int i2 = (h_3) / dH; + if (i2 * dH != h_3) continue; + for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) { + int j2 = (w_3) / dW; + if (j2 * dW != w_3) continue; + if + WITHIN_BOUNDS(i2, j2, H, W) { + prod_sum[ph_off][pw_off] += + grad_output[n][ph][pw][i2][j2] * val; + } } } } - grad_cache[i] = grad_val; } } + __syncthreads(); - for (int c = threadIdx.x; c < C; c += blockDim.x) { - scalar_t grad_input_val = 0.0f; - for (int ph = 0; ph < patchH; ++ph) { - int i1 = h + dilation_patchH * (ph - patchRadH); - for (int pw = 0; pw < patchW; ++pw) { - int j1 = w + dilation_patchW * (pw - patchRadW); - if (WITHIN_BOUNDS(i1, j1, iH, iW)) { - grad_input_val += input2[n][i1][j1][c] * grad_cache[ph * patchW + pw]; - } + if (ph_off == 0 && pw_off == 0) { + scalar_t reduce_sum = 0; + for (int ph = 0; ph < THREADS_BACKWARD; ++ph) { + for (int pw = 0; pw < THREADS_BACKWARD; ++pw) { + reduce_sum += prod_sum[ph][pw]; } } - grad_input1[n][c][h][w] = grad_input_val; + grad_input1[n][c][h][w] = reduce_sum; } } @@ -164,10 +163,9 @@ __global__ void correlation_backward_cuda_kernel_input2( const TensorAcc5R grad_output, const TensorAcc4R input1, TensorAcc4R grad_input2, int kH, int kW, int patchH, int patchW, int padH, int padW, int dilationH, int dilationW, int dilation_patchH, - int dilation_patchW, int dH, int dW) { - const int iH = input1.size(1); - const int iW = input1.size(2); - const int C = input1.size(3); + int dilation_patchW, int dH, int dW, int batch) { + const int iH = input1.size(2); + const int iW = input1.size(3); const int patchRadH = (patchH - 1) / 2; const int patchRadW = (patchW - 1) / 2; @@ -178,54 +176,56 @@ __global__ void correlation_backward_cuda_kernel_input2( const int dilatedKH = kH * dilationH; const int dilatedKW = kW * dilationW; - const int n = blockIdx.x; + const int n = batch; + const int c = blockIdx.x; const int h = blockIdx.y; const int w = blockIdx.z; + const int ph_off = threadIdx.x; + const int pw_off = threadIdx.y; - extern __shared__ __align__(sizeof(4)) unsigned char grad_cache_char[]; - scalar_t *grad_cache = reinterpret_cast(grad_cache_char); - for (int i = threadIdx.x; i < patchH * patchW; i += blockDim.x) { - const int ph = i / patchW; - const int pw = i % patchW; + __shared__ scalar_t prod_sum[THREADS_BACKWARD][THREADS_BACKWARD]; + prod_sum[ph_off][pw_off] = 0; + + for (int ph = ph_off; ph < patchH; ph += THREADS_BACKWARD) { int i1 = h - dilation_patchH * (ph - patchRadH); - int j1 = w - dilation_patchW * (pw - patchRadW); - - if (WITHIN_BOUNDS(i1, j1, iH, iW)) { - scalar_t grad_val = 0.0f; - - const int h_2 = i1 + padH; - const int w_2 = j1 + padW; - const int min_h = h_2 - dilatedKH; - const int min_w = w_2 - dilatedKW; - - for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) { - int i2 = (h_3) / dH; - if (i2 * dH != h_3) continue; - for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) { - int j2 = (w_3) / dW; - if (j2 * dW != w_3) continue; - if (WITHIN_BOUNDS(i2, j2, H, W)) { - grad_val += grad_output[n][ph][pw][i2][j2]; + for (int pw = pw_off; pw < patchW; pw += THREADS_BACKWARD) { + int j1 = w - dilation_patchW * (pw - patchRadW); + if + WITHIN_BOUNDS(i1, j1, iH, iW) { + scalar_t val = input1[n][c][i1][j1]; + + const int h_2 = i1 + padH; + const int w_2 = j1 + padW; + const int min_h = h_2 - dilatedKH; + const int min_w = w_2 - dilatedKW; + + for (int h_3 = h_2; h_3 > min_h; h_3 -= dilationH) { + int i2 = (h_3) / dH; + if (i2 * dH != h_3) continue; + for (int w_3 = w_2; w_3 > min_w; w_3 -= dilationW) { + int j2 = (w_3) / dW; + if (j2 * dW != w_3) continue; + if + WITHIN_BOUNDS(i2, j2, H, W) { + prod_sum[ph_off][pw_off] += + grad_output[n][ph][pw][i2][j2] * val; + } + } } } - } - grad_cache[i] = grad_val; } } + __syncthreads(); - for (int c = threadIdx.x; c < C; c += blockDim.x) { - scalar_t grad_input_val = 0.0f; - for (int ph = 0; ph < patchH; ++ph) { - int i1 = h - dilation_patchH * (ph - patchRadH); - for (int pw = 0; pw < patchW; ++pw) { - int j1 = w - dilation_patchW * (pw - patchRadW); - if (WITHIN_BOUNDS(i1, j1, iH, iW)) { - grad_input_val += input1[n][i1][j1][c] * grad_cache[ph * patchW + pw]; - } + if (ph_off == 0 && pw_off == 0) { + scalar_t reduce_sum = 0; + for (int ph = 0; ph < THREADS_BACKWARD; ++ph) { + for (int pw = 0; pw < THREADS_BACKWARD; ++pw) { + reduce_sum += prod_sum[ph][pw]; } } - grad_input2[n][c][h][w] = grad_input_val; + grad_input2[n][c][h][w] = reduce_sum; } } #endif diff --git a/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh deleted file mode 100644 index 053977a..0000000 --- a/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -// Adapted from -// https://github.com/lilanxiao/Rotated_IoU/cuda_op/sort_vert_kernel.cu # noqa -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -#define MAX_NUM_VERT_IDX 9 -#define INTERSECTION_OFFSET 8 -#define EPSILON 1e-8 - -inline int opt_n_thread(int work_size) { - const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); - return max(min(1 << pow_2, THREADS_PER_BLOCK), 1); -} - -/* -compare normalized vertices (vertices around (0,0)) -if vertex1 < vertex2 return true. -order: minimum at x-aixs, become larger in anti-clockwise direction -*/ -__device__ bool compare_vertices(float x1, float y1, float x2, float y2) { - if (fabs(x1 - x2) < EPSILON && fabs(y2 - y1) < EPSILON) - return false; // if equal, return false - - if (y1 > 0 && y2 < 0) return true; - if (y1 < 0 && y2 > 0) return false; - - float n1 = x1 * x1 + y1 * y1 + EPSILON; - float n2 = x2 * x2 + y2 * y2 + EPSILON; - float diff = fabs(x1) * x1 / n1 - fabs(x2) * x2 / n2; - - if (y1 > 0 && y2 > 0) { - if (diff > EPSILON) - return true; - else - return false; - } - if (y1 < 0 && y2 < 0) { - if (diff < EPSILON) - return true; - else - return false; - } - return false; -} - -__global__ void diff_iou_rotated_sort_vertices_forward_cuda_kernel( - int b, int n, int m, const float *__restrict__ vertices, - const bool *__restrict__ mask, const int *__restrict__ num_valid, - int *__restrict__ idx) { - int batch_idx = blockIdx.x; - vertices += batch_idx * n * m * 2; - mask += batch_idx * n * m; - num_valid += batch_idx * n; - idx += batch_idx * n * MAX_NUM_VERT_IDX; - - int index = threadIdx.x; // index of polygon - int stride = blockDim.x; - for (int i = index; i < n; i += stride) { - int pad; // index of arbitrary invalid intersection point (not box corner!) - for (int j = INTERSECTION_OFFSET; j < m; ++j) { - if (!mask[i * m + j]) { - pad = j; - break; - } - } - if (num_valid[i] < 3) { - // not enough vertices, take an invalid intersection point - // (zero padding) - for (int j = 0; j < MAX_NUM_VERT_IDX; ++j) { - idx[i * MAX_NUM_VERT_IDX + j] = pad; - } - } else { - // sort the valid vertices - // note the number of valid vertices is known - // note: check that num_valid[i] < MAX_NUM_VERT_IDX - for (int j = 0; j < num_valid[i]; ++j) { - // initialize with a "big" value - float x_min = 1; - float y_min = -EPSILON; - int i_take = 0; - int i2; - float x2, y2; - if (j != 0) { - i2 = idx[i * MAX_NUM_VERT_IDX + j - 1]; - x2 = vertices[i * m * 2 + i2 * 2 + 0]; - y2 = vertices[i * m * 2 + i2 * 2 + 1]; - } - for (int k = 0; k < m; ++k) { - float x = vertices[i * m * 2 + k * 2 + 0]; - float y = vertices[i * m * 2 + k * 2 + 1]; - if (mask[i * m + k] && compare_vertices(x, y, x_min, y_min)) { - if ((j == 0) || (j != 0 && compare_vertices(x2, y2, x, y))) { - x_min = x; - y_min = y; - i_take = k; - } - } - } - idx[i * MAX_NUM_VERT_IDX + j] = i_take; - } - // duplicate the first idx - idx[i * MAX_NUM_VERT_IDX + num_valid[i]] = idx[i * MAX_NUM_VERT_IDX + 0]; - - // pad zeros - for (int j = num_valid[i] + 1; j < MAX_NUM_VERT_IDX; ++j) { - idx[i * MAX_NUM_VERT_IDX + j] = pad; - } - - // for corner case: the two boxes are exactly the same. - // in this case, idx would have duplicate elements, which makes the - // shoelace formula broken because of the definition, the duplicate - // elements only appear in the first 8 positions (they are "corners in - // box", not "intersection of edges") - if (num_valid[i] == 8) { - int counter = 0; - for (int j = 0; j < 4; ++j) { - int check = idx[i * MAX_NUM_VERT_IDX + j]; - for (int k = 4; k < INTERSECTION_OFFSET; ++k) { - if (idx[i * MAX_NUM_VERT_IDX + k] == check) counter++; - } - } - if (counter == 4) { - idx[i * MAX_NUM_VERT_IDX + 4] = idx[i * MAX_NUM_VERT_IDX + 0]; - for (int j = 5; j < MAX_NUM_VERT_IDX; ++j) { - idx[i * MAX_NUM_VERT_IDX + j] = pad; - } - } - } - - // TODO: still might need to cover some other corner cases :( - } - } -} diff --git a/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh index 6d93243..c8fc615 100644 --- a/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh @@ -22,14 +22,13 @@ __global__ void gather_points_forward_cuda_kernel(int b, int c, int n, int m, int bs_idx = blockIdx.z; int c_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, m) { - if (bs_idx >= b || c_idx >= c) return; - - out += bs_idx * c * m + c_idx * m + pt_idx; - idx += bs_idx * m + pt_idx; - points += bs_idx * c * n + c_idx * n; - out[0] = points[idx[0]]; - } + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; + + out += bs_idx * c * m + c_idx * m + pt_idx; + idx += bs_idx * m + pt_idx; + points += bs_idx * c * n + c_idx * n; + out[0] = points[idx[0]]; } template @@ -44,15 +43,14 @@ __global__ void gather_points_backward_cuda_kernel(int b, int c, int n, int m, int bs_idx = blockIdx.z; int c_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, m) { - if (bs_idx >= b || c_idx >= c) return; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; - grad_out += bs_idx * c * m + c_idx * m + pt_idx; - idx += bs_idx * m + pt_idx; - grad_points += bs_idx * c * n + c_idx * n; + grad_out += bs_idx * c * m + c_idx * m + pt_idx; + idx += bs_idx * m + pt_idx; + grad_points += bs_idx * c * n + c_idx * n; - atomicAdd(grad_points + idx[0], grad_out[0]); - } + atomicAdd(grad_points + idx[0], grad_out[0]); } #endif // GATHER_POINTS_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh index dfad66f..9cfc2dc 100644 --- a/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh @@ -22,19 +22,18 @@ __global__ void group_points_forward_cuda_kernel(int b, int c, int n, // out: (B, C, npoints, nsample) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(index, npoints * nsample) { - if (bs_idx >= b || c_idx >= c) return; + int index = blockIdx.x * blockDim.x + threadIdx.x; + int pt_idx = index / nsample; + if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; - int pt_idx = index / nsample; - int sample_idx = index % nsample; + int sample_idx = index % nsample; - idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; - int in_idx = bs_idx * c * n + c_idx * n + idx[0]; - int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + - pt_idx * nsample + sample_idx; + idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; + int in_idx = bs_idx * c * n + c_idx * n + idx[0]; + int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + + pt_idx * nsample + sample_idx; - out[out_idx] = points[in_idx]; - } + out[out_idx] = points[in_idx]; } template @@ -49,17 +48,16 @@ __global__ void group_points_backward_cuda_kernel(int b, int c, int n, // grad_points: (B, C, N) int bs_idx = blockIdx.z; int c_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(index, npoints * nsample) { - int pt_idx = index / nsample; - if (bs_idx >= b || c_idx >= c) return; + int index = blockIdx.x * blockDim.x + threadIdx.x; + int pt_idx = index / nsample; + if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; - int sample_idx = index % nsample; - grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + - pt_idx * nsample + sample_idx; - idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; + int sample_idx = index % nsample; + grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + + pt_idx * nsample + sample_idx; + idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; - atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0], grad_out[0]); - } + atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0], grad_out[0]); } #endif // GROUP_POINTS_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh index 9ebdcad..4e261cb 100644 --- a/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh @@ -50,17 +50,21 @@ __device__ int check_rect_cross(const Point &p1, const Point &p2, } __device__ inline int check_in_box2d(const float *box, const Point &p) { - // params: box (7) [x, y, z, dx, dy, dz, heading] - const float MARGIN = 1e-2; - - float center_x = box[0], center_y = box[1]; - // rotate the point in the opposite direction of box - float angle_cos = cos(-box[6]), angle_sin = sin(-box[6]); - float rot_x = (p.x - center_x) * angle_cos + (p.y - center_y) * (-angle_sin); - float rot_y = (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos; - - return (fabs(rot_x) < box[3] / 2 + MARGIN && - fabs(rot_y) < box[4] / 2 + MARGIN); + // params: box (5) [x1, y1, x2, y2, angle] + const float MARGIN = 1e-5; + + float center_x = (box[0] + box[2]) / 2; + float center_y = (box[1] + box[3]) / 2; + float angle_cos = cos(-box[4]), + angle_sin = + sin(-box[4]); // rotate the point in the opposite direction of box + float rot_x = + (p.x - center_x) * angle_cos - (p.y - center_y) * angle_sin + center_x; + float rot_y = + (p.x - center_x) * angle_sin + (p.y - center_y) * angle_cos + center_y; + + return (rot_x > box[0] - MARGIN && rot_x < box[2] + MARGIN && + rot_y > box[1] - MARGIN && rot_y < box[3] + MARGIN); } __device__ inline int intersection(const Point &p1, const Point &p0, @@ -112,19 +116,16 @@ __device__ inline int point_cmp(const Point &a, const Point &b, } __device__ inline float box_overlap(const float *box_a, const float *box_b) { - // params box_a: [x, y, z, dx, dy, dz, heading] - // params box_b: [x, y, z, dx, dy, dz, heading] + // params: box_a (5) [x1, y1, x2, y2, angle] + // params: box_b (5) [x1, y1, x2, y2, angle] - float a_angle = box_a[6], b_angle = box_b[6]; - float a_dx_half = box_a[3] / 2, b_dx_half = box_b[3] / 2, - a_dy_half = box_a[4] / 2, b_dy_half = box_b[4] / 2; - float a_x1 = box_a[0] - a_dx_half, a_y1 = box_a[1] - a_dy_half; - float a_x2 = box_a[0] + a_dx_half, a_y2 = box_a[1] + a_dy_half; - float b_x1 = box_b[0] - b_dx_half, b_y1 = box_b[1] - b_dy_half; - float b_x2 = box_b[0] + b_dx_half, b_y2 = box_b[1] + b_dy_half; + float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3], + a_angle = box_a[4]; + float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3], + b_angle = box_b[4]; - Point center_a(box_a[0], box_a[1]); - Point center_b(box_b[0], box_b[1]); + Point center_a((a_x1 + a_x2) / 2, (a_y1 + a_y2) / 2); + Point center_b((b_x1 + b_x2) / 2, (b_y1 + b_y2) / 2); Point box_a_corners[5]; box_a_corners[0].set(a_x1, a_y1); @@ -208,10 +209,10 @@ __device__ inline float box_overlap(const float *box_a, const float *box_b) { } __device__ inline float iou_bev(const float *box_a, const float *box_b) { - // params box_a: [x, y, z, dx, dy, dz, heading] - // params box_b: [x, y, z, dx, dy, dz, heading] - float sa = box_a[3] * box_a[4]; - float sb = box_b[3] * box_b[4]; + // params: box_a (5) [x1, y1, x2, y2, angle] + // params: box_b (5) [x1, y1, x2, y2, angle] + float sa = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1]); + float sb = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]); float s_overlap = box_overlap(box_a, box_b); return s_overlap / fmaxf(sa + sb - s_overlap, EPS); } @@ -219,148 +220,149 @@ __device__ inline float iou_bev(const float *box_a, const float *box_b) { __global__ void iou3d_boxes_overlap_bev_forward_cuda_kernel( const int num_a, const float *boxes_a, const int num_b, const float *boxes_b, float *ans_overlap) { - // params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading] - // params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading] - CUDA_2D_KERNEL_LOOP(b_idx, num_b, a_idx, num_a) { - if (a_idx >= num_a || b_idx >= num_b) { - return; - } + const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y; + const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x; - const float *cur_box_a = boxes_a + a_idx * 7; - const float *cur_box_b = boxes_b + b_idx * 7; - float cur_overlap = box_overlap(cur_box_a, cur_box_b); - ans_overlap[a_idx * num_b + b_idx] = cur_overlap; + if (a_idx >= num_a || b_idx >= num_b) { + return; } + const float *cur_box_a = boxes_a + a_idx * 5; + const float *cur_box_b = boxes_b + b_idx * 5; + float s_overlap = box_overlap(cur_box_a, cur_box_b); + ans_overlap[a_idx * num_b + b_idx] = s_overlap; } -__global__ void iou3d_nms3d_forward_cuda_kernel(const int boxes_num, - const float nms_overlap_thresh, - const float *boxes, - unsigned long long *mask) { - // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading] +__global__ void iou3d_boxes_iou_bev_forward_cuda_kernel(const int num_a, + const float *boxes_a, + const int num_b, + const float *boxes_b, + float *ans_iou) { + const int a_idx = blockIdx.y * THREADS_PER_BLOCK + threadIdx.y; + const int b_idx = blockIdx.x * THREADS_PER_BLOCK + threadIdx.x; + + if (a_idx >= num_a || b_idx >= num_b) { + return; + } + + const float *cur_box_a = boxes_a + a_idx * 5; + const float *cur_box_b = boxes_b + b_idx * 5; + float cur_iou_bev = iou_bev(cur_box_a, cur_box_b); + ans_iou[a_idx * num_b + b_idx] = cur_iou_bev; +} + +__global__ void nms_forward_cuda_kernel(const int boxes_num, + const float nms_overlap_thresh, + const float *boxes, + unsigned long long *mask) { + // params: boxes (N, 5) [x1, y1, x2, y2, ry] // params: mask (N, N/THREADS_PER_BLOCK_NMS) - const int blocks = - (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; - CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) { - // if (row_start > col_start) return; - - const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, - THREADS_PER_BLOCK_NMS); - const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, - THREADS_PER_BLOCK_NMS); - - __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7]; - - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 7 + 0] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0]; - block_boxes[threadIdx.x * 7 + 1] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1]; - block_boxes[threadIdx.x * 7 + 2] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2]; - block_boxes[threadIdx.x * 7 + 3] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3]; - block_boxes[threadIdx.x * 7 + 4] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4]; - block_boxes[threadIdx.x * 7 + 5] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5]; - block_boxes[threadIdx.x * 7 + 6] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6]; - } - __syncthreads(); - if (threadIdx.x < row_size) { - const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; - const float *cur_box = boxes + cur_box_idx * 7; + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + + __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5]; + + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - if (iou_bev(cur_box, block_boxes + i * 7) > nms_overlap_thresh) { - t |= 1ULL << i; - } + if (threadIdx.x < row_size) { + const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const float *cur_box = boxes + cur_box_idx * 5; + + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (iou_bev(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { + t |= 1ULL << i; } - const int col_blocks = - (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; - mask[cur_box_idx * col_blocks + col_start] = t; } + const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + mask[cur_box_idx * col_blocks + col_start] = t; } } __device__ inline float iou_normal(float const *const a, float const *const b) { - // params: a: [x, y, z, dx, dy, dz, heading] - // params: b: [x, y, z, dx, dy, dz, heading] - - float left = fmaxf(a[0] - a[3] / 2, b[0] - b[3] / 2), - right = fminf(a[0] + a[3] / 2, b[0] + b[3] / 2); - float top = fmaxf(a[1] - a[4] / 2, b[1] - b[4] / 2), - bottom = fminf(a[1] + a[4] / 2, b[1] + b[4] / 2); + float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); + float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f); float interS = width * height; - float Sa = a[3] * a[4]; - float Sb = b[3] * b[4]; + float Sa = (a[2] - a[0]) * (a[3] - a[1]); + float Sb = (b[2] - b[0]) * (b[3] - b[1]); return interS / fmaxf(Sa + Sb - interS, EPS); } -__global__ void iou3d_nms3d_normal_forward_cuda_kernel( - const int boxes_num, const float nms_overlap_thresh, const float *boxes, - unsigned long long *mask) { - // params: boxes (N, 7) [x, y, z, dx, dy, dz, heading] +__global__ void nms_normal_forward_cuda_kernel(const int boxes_num, + const float nms_overlap_thresh, + const float *boxes, + unsigned long long *mask) { + // params: boxes (N, 5) [x1, y1, x2, y2, ry] // params: mask (N, N/THREADS_PER_BLOCK_NMS) - const int blocks = - (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; - CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) { - // if (row_start > col_start) return; - - const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, - THREADS_PER_BLOCK_NMS); - const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, - THREADS_PER_BLOCK_NMS); - - __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 7]; - - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 7 + 0] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 0]; - block_boxes[threadIdx.x * 7 + 1] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 1]; - block_boxes[threadIdx.x * 7 + 2] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 2]; - block_boxes[threadIdx.x * 7 + 3] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 3]; - block_boxes[threadIdx.x * 7 + 4] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 4]; - block_boxes[threadIdx.x * 7 + 5] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 5]; - block_boxes[threadIdx.x * 7 + 6] = - boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 7 + 6]; - } - __syncthreads(); + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; + + // if (row_start > col_start) return; + + const int row_size = fminf(boxes_num - row_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + const int col_size = fminf(boxes_num - col_start * THREADS_PER_BLOCK_NMS, + THREADS_PER_BLOCK_NMS); + + __shared__ float block_boxes[THREADS_PER_BLOCK_NMS * 5]; + + if (threadIdx.x < col_size) { + block_boxes[threadIdx.x * 5 + 0] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 0]; + block_boxes[threadIdx.x * 5 + 1] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 1]; + block_boxes[threadIdx.x * 5 + 2] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 2]; + block_boxes[threadIdx.x * 5 + 3] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 3]; + block_boxes[threadIdx.x * 5 + 4] = + boxes[(THREADS_PER_BLOCK_NMS * col_start + threadIdx.x) * 5 + 4]; + } + __syncthreads(); - if (threadIdx.x < row_size) { - const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; - const float *cur_box = boxes + cur_box_idx * 7; + if (threadIdx.x < row_size) { + const int cur_box_idx = THREADS_PER_BLOCK_NMS * row_start + threadIdx.x; + const float *cur_box = boxes + cur_box_idx * 5; - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - if (iou_normal(cur_box, block_boxes + i * 7) > nms_overlap_thresh) { - t |= 1ULL << i; - } + int i = 0; + unsigned long long t = 0; + int start = 0; + if (row_start == col_start) { + start = threadIdx.x + 1; + } + for (i = start; i < col_size; i++) { + if (iou_normal(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { + t |= 1ULL << i; } - const int col_blocks = - (boxes_num + THREADS_PER_BLOCK_NMS - 1) / THREADS_PER_BLOCK_NMS; - mask[cur_box_idx * col_blocks + col_start] = t; } + const int col_blocks = DIVUP(boxes_num, THREADS_PER_BLOCK_NMS); + mask[cur_box_idx * col_blocks + col_start] = t; } } diff --git a/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh index 3cf52bb..3181aa6 100644 --- a/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh @@ -51,42 +51,41 @@ __global__ void knn_forward_cuda_kernel(int b, int n, int m, int nsample, const T *xyz, const T *new_xyz, int *__restrict__ idx, T *dist2) { int bs_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, m) { - if (bs_idx >= b) return; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || pt_idx >= m) return; - new_xyz += bs_idx * m * 3 + pt_idx * 3; - xyz += bs_idx * n * 3; - idx += bs_idx * m * nsample + pt_idx * nsample; - dist2 += bs_idx * m * nsample + pt_idx * nsample; + new_xyz += bs_idx * m * 3 + pt_idx * 3; + xyz += bs_idx * n * 3; + idx += bs_idx * m * nsample + pt_idx * nsample; + dist2 += bs_idx * m * nsample + pt_idx * nsample; - T new_x = new_xyz[0]; - T new_y = new_xyz[1]; - T new_z = new_xyz[2]; + T new_x = new_xyz[0]; + T new_y = new_xyz[1]; + T new_z = new_xyz[2]; - float best_dist[100]; - int best_idx[100]; - for (int i = 0; i < nsample; i++) { - best_dist[i] = 1e10; - best_idx[i] = 0; - } - for (int i = 0; i < n; i++) { - T x = xyz[i * 3 + 0]; - T y = xyz[i * 3 + 1]; - T z = xyz[i * 3 + 2]; - T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + - (new_z - z) * (new_z - z); - if (d2 < best_dist[0]) { - best_dist[0] = d2; - best_idx[0] = i; - reheap(best_dist, best_idx, nsample); - } - } - heap_sort(best_dist, best_idx, nsample); - for (int i = 0; i < nsample; i++) { - idx[i] = best_idx[i]; - dist2[i] = best_dist[i]; + float best_dist[100]; + int best_idx[100]; + for (int i = 0; i < nsample; i++) { + best_dist[i] = 1e10; + best_idx[i] = 0; + } + for (int i = 0; i < n; i++) { + T x = xyz[i * 3 + 0]; + T y = xyz[i * 3 + 1]; + T z = xyz[i * 3 + 2]; + T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + + (new_z - z) * (new_z - z); + if (d2 < best_dist[0]) { + best_dist[0] = d2; + best_idx[0] = i; + reheap(best_dist, best_idx, nsample); } } + heap_sort(best_dist, best_idx, nsample); + for (int i = 0; i < nsample; i++) { + idx[i] = best_idx[i]; + dist2[i] = best_dist[i]; + } } #endif // KNN_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh b/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh deleted file mode 100644 index df56e74..0000000 --- a/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh +++ /dev/null @@ -1,300 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -#ifndef MIN_AREA_POLYGONS_CUDA_KERNEL_CUH -#define MIN_AREA_POLYGONS_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -#define MAXN 20 -__device__ const float PI = 3.1415926; - -struct Point { - float x, y; - __device__ Point() {} - __device__ Point(float x, float y) : x(x), y(y) {} -}; - -__device__ inline void swap1(Point *a, Point *b) { - Point temp; - temp.x = a->x; - temp.y = a->y; - - a->x = b->x; - a->y = b->y; - - b->x = temp.x; - b->y = temp.y; -} -__device__ inline float cross(Point o, Point a, Point b) { - return (a.x - o.x) * (b.y - o.y) - (b.x - o.x) * (a.y - o.y); -} - -__device__ inline float dis(Point a, Point b) { - return (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); -} -__device__ inline void minBoundingRect(Point *ps, int n_points, float *minbox) { - float convex_points[2][MAXN]; - for (int j = 0; j < n_points; j++) { - convex_points[0][j] = ps[j].x; - } - for (int j = 0; j < n_points; j++) { - convex_points[1][j] = ps[j].y; - } - - Point edges[MAXN]; - float edges_angles[MAXN]; - float unique_angles[MAXN]; - int n_edges = n_points - 1; - int n_unique = 0; - int unique_flag = 0; - - for (int i = 0; i < n_edges; i++) { - edges[i].x = ps[i + 1].x - ps[i].x; - edges[i].y = ps[i + 1].y - ps[i].y; - } - for (int i = 0; i < n_edges; i++) { - edges_angles[i] = atan2((double)edges[i].y, (double)edges[i].x); - if (edges_angles[i] >= 0) { - edges_angles[i] = fmod((double)edges_angles[i], (double)PI / 2); - } else { - edges_angles[i] = - edges_angles[i] - (int)(edges_angles[i] / (PI / 2) - 1) * (PI / 2); - } - } - unique_angles[0] = edges_angles[0]; - n_unique += 1; - for (int i = 1; i < n_edges; i++) { - for (int j = 0; j < n_unique; j++) { - if (edges_angles[i] == unique_angles[j]) { - unique_flag += 1; - } - } - if (unique_flag == 0) { - unique_angles[n_unique] = edges_angles[i]; - n_unique += 1; - unique_flag = 0; - } else { - unique_flag = 0; - } - } - - float minarea = 1e12; - for (int i = 0; i < n_unique; i++) { - float R[2][2]; - float rot_points[2][MAXN]; - R[0][0] = cos(unique_angles[i]); - R[0][1] = sin(unique_angles[i]); - R[1][0] = -sin(unique_angles[i]); - R[1][1] = cos(unique_angles[i]); - // R x Points - for (int m = 0; m < 2; m++) { - for (int n = 0; n < n_points; n++) { - float sum = 0.0; - for (int k = 0; k < 2; k++) { - sum = sum + R[m][k] * convex_points[k][n]; - } - rot_points[m][n] = sum; - } - } - - // xmin; - float xmin, ymin, xmax, ymax; - xmin = 1e12; - for (int j = 0; j < n_points; j++) { - if (isinf(rot_points[0][j]) || isnan(rot_points[0][j])) { - continue; - } else { - if (rot_points[0][j] < xmin) { - xmin = rot_points[0][j]; - } - } - } - // ymin - ymin = 1e12; - for (int j = 0; j < n_points; j++) { - if (isinf(rot_points[1][j]) || isnan(rot_points[1][j])) { - continue; - } else { - if (rot_points[1][j] < ymin) { - ymin = rot_points[1][j]; - } - } - } - // xmax - xmax = -1e12; - for (int j = 0; j < n_points; j++) { - if (isinf(rot_points[0][j]) || isnan(rot_points[0][j])) { - continue; - } else { - if (rot_points[0][j] > xmax) { - xmax = rot_points[0][j]; - } - } - } - // ymax - ymax = -1e12; - for (int j = 0; j < n_points; j++) { - if (isinf(rot_points[1][j]) || isnan(rot_points[1][j])) { - continue; - } else { - if (rot_points[1][j] > ymax) { - ymax = rot_points[1][j]; - } - } - } - float area = (xmax - xmin) * (ymax - ymin); - if (area < minarea) { - minarea = area; - minbox[0] = unique_angles[i]; - minbox[1] = xmin; - minbox[2] = ymin; - minbox[3] = xmax; - minbox[4] = ymax; - } - } -} - -// convex_find -__device__ inline void Jarvis(Point *in_poly, int &n_poly) { - int n_input = n_poly; - Point input_poly[20]; - for (int i = 0; i < n_input; i++) { - input_poly[i].x = in_poly[i].x; - input_poly[i].y = in_poly[i].y; - } - Point p_max, p_k; - int max_index, k_index; - int Stack[20], top1, top2; - // float sign; - double sign; - Point right_point[10], left_point[10]; - - for (int i = 0; i < n_poly; i++) { - if (in_poly[i].y < in_poly[0].y || - in_poly[i].y == in_poly[0].y && in_poly[i].x < in_poly[0].x) { - Point *j = &(in_poly[0]); - Point *k = &(in_poly[i]); - swap1(j, k); - } - if (i == 0) { - p_max = in_poly[0]; - max_index = 0; - } - if (in_poly[i].y > p_max.y || - in_poly[i].y == p_max.y && in_poly[i].x > p_max.x) { - p_max = in_poly[i]; - max_index = i; - } - } - if (max_index == 0) { - max_index = 1; - p_max = in_poly[max_index]; - } - - k_index = 0, Stack[0] = 0, top1 = 0; - while (k_index != max_index) { - p_k = p_max; - k_index = max_index; - for (int i = 1; i < n_poly; i++) { - sign = cross(in_poly[Stack[top1]], in_poly[i], p_k); - if ((sign > 0) || ((sign == 0) && (dis(in_poly[Stack[top1]], in_poly[i]) > - dis(in_poly[Stack[top1]], p_k)))) { - p_k = in_poly[i]; - k_index = i; - } - } - top1++; - Stack[top1] = k_index; - } - - for (int i = 0; i <= top1; i++) { - right_point[i] = in_poly[Stack[i]]; - } - - k_index = 0, Stack[0] = 0, top2 = 0; - - while (k_index != max_index) { - p_k = p_max; - k_index = max_index; - for (int i = 1; i < n_poly; i++) { - sign = cross(in_poly[Stack[top2]], in_poly[i], p_k); - if ((sign < 0) || (sign == 0) && (dis(in_poly[Stack[top2]], in_poly[i]) > - dis(in_poly[Stack[top2]], p_k))) { - p_k = in_poly[i]; - k_index = i; - } - } - top2++; - Stack[top2] = k_index; - } - - for (int i = top2 - 1; i >= 0; i--) { - left_point[i] = in_poly[Stack[i]]; - } - - for (int i = 0; i < top1 + top2; i++) { - if (i <= top1) { - in_poly[i] = right_point[i]; - } else { - in_poly[i] = left_point[top2 - (i - top1)]; - } - } - n_poly = top1 + top2; -} - -template -__device__ inline void Findminbox(T const *const p, T *minpoints) { - Point ps1[MAXN]; - Point convex[MAXN]; - for (int i = 0; i < 9; i++) { - convex[i].x = p[i * 2]; - convex[i].y = p[i * 2 + 1]; - } - int n_convex = 9; - Jarvis(convex, n_convex); - int n1 = n_convex; - for (int i = 0; i < n1; i++) { - ps1[i].x = convex[i].x; - ps1[i].y = convex[i].y; - } - ps1[n1].x = convex[0].x; - ps1[n1].y = convex[0].y; - - float minbbox[5] = {0}; - minBoundingRect(ps1, n1 + 1, minbbox); - float angle = minbbox[0]; - float xmin = minbbox[1]; - float ymin = minbbox[2]; - float xmax = minbbox[3]; - float ymax = minbbox[4]; - float R[2][2]; - - R[0][0] = cos(angle); - R[0][1] = sin(angle); - R[1][0] = -sin(angle); - R[1][1] = cos(angle); - - minpoints[0] = xmax * R[0][0] + ymin * R[1][0]; - minpoints[1] = xmax * R[0][1] + ymin * R[1][1]; - minpoints[2] = xmin * R[0][0] + ymin * R[1][0]; - minpoints[3] = xmin * R[0][1] + ymin * R[1][1]; - minpoints[4] = xmin * R[0][0] + ymax * R[1][0]; - minpoints[5] = xmin * R[0][1] + ymax * R[1][1]; - minpoints[6] = xmax * R[0][0] + ymax * R[1][0]; - minpoints[7] = xmax * R[0][1] + ymax * R[1][1]; -} - -template -__global__ void min_area_polygons_cuda_kernel(const int ex_n_boxes, - const T *ex_boxes, T *minbox) { - CUDA_1D_KERNEL_LOOP(index, ex_n_boxes) { - const T *cur_box = ex_boxes + index * 18; - T *cur_min_box = minbox + index * 8; - Findminbox(cur_box, cur_min_box); - } -} - -#endif // MIN_AREA_POLYGONS_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh index 12225ff..aff1ea2 100644 --- a/mmcv/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh @@ -14,6 +14,11 @@ #include "common_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp" +const int CUDA_NUM_THREADS = 1024; +inline int GET_BLOCKS(const int N, const int num_threads) { + return (N + num_threads - 1) / num_threads; +} + template __device__ scalar_t ms_deform_attn_im2col_bilinear( const scalar_t *&bottom_data, const int &height, const int &width, @@ -262,11 +267,10 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1( const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { - __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; - __shared__ scalar_t cache_grad_attn_weight[blockSize]; - unsigned int tid = threadIdx.x; - const int qid_stride = num_heads * channels; CUDA_1D_KERNEL_LOOP(index, n) { + __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; + __shared__ scalar_t cache_grad_attn_weight[blockSize]; + unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; @@ -281,11 +285,11 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1( int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; - scalar_t *grad_sampling_loc_out = - grad_sampling_loc + (grad_sampling_ptr << 1); - scalar_t *grad_attn_weight_out = grad_attn_weight + grad_sampling_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; + const int qid_stride = num_heads * channels; const int data_value_ptr_init_offset = b_col * spatial_size * qid_stride; for (int l_col = 0; l_col < num_levels; ++l_col) { @@ -322,23 +326,23 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v1( _grad_h = cache_grad_sampling_loc[1], _grad_a = cache_grad_attn_weight[0]; int sid = 2; - for (unsigned int _tid = 1; _tid < blockSize; ++_tid) { + for (unsigned int tid = 1; tid < blockSize; ++tid) { _grad_w += cache_grad_sampling_loc[sid]; _grad_h += cache_grad_sampling_loc[sid + 1]; - _grad_a += cache_grad_attn_weight[_tid]; + _grad_a += cache_grad_attn_weight[tid]; sid += 2; } - *grad_sampling_loc_out = _grad_w; - *(grad_sampling_loc_out + 1) = _grad_h; - *grad_attn_weight_out = _grad_a; + *grad_sampling_loc = _grad_w; + *(grad_sampling_loc + 1) = _grad_h; + *grad_attn_weight = _grad_a; } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; - grad_attn_weight_out += grad_weight_stride; - grad_sampling_loc_out += grad_loc_stride; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; } } } @@ -353,10 +357,10 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2( const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { - __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; - __shared__ scalar_t cache_grad_attn_weight[blockSize]; - unsigned int tid = threadIdx.x; CUDA_1D_KERNEL_LOOP(index, n) { + __shared__ scalar_t cache_grad_sampling_loc[blockSize * 2]; + __shared__ scalar_t cache_grad_attn_weight[blockSize]; + unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; @@ -371,9 +375,8 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2( int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; - scalar_t *grad_sampling_loc_out = - grad_sampling_loc + (grad_sampling_ptr << 1); - scalar_t *grad_attn_weight_out = grad_attn_weight + grad_sampling_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; @@ -422,16 +425,16 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_blocksize_aware_reduce_v2( } if (tid == 0) { - *grad_sampling_loc_out = cache_grad_sampling_loc[0]; - *(grad_sampling_loc_out + 1) = cache_grad_sampling_loc[1]; - *grad_attn_weight_out = cache_grad_attn_weight[0]; + *grad_sampling_loc = cache_grad_sampling_loc[0]; + *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; + *grad_attn_weight = cache_grad_attn_weight[0]; } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; - grad_attn_weight_out += grad_weight_stride; - grad_sampling_loc_out += grad_loc_stride; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; } } } @@ -446,11 +449,11 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1( const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { - extern __shared__ int _s[]; - scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); - scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; - unsigned int tid = threadIdx.x; CUDA_1D_KERNEL_LOOP(index, n) { + extern __shared__ int _s[]; + scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); + scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; @@ -465,9 +468,8 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1( int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; - scalar_t *grad_sampling_loc_out = - grad_sampling_loc + (grad_sampling_ptr << 1); - scalar_t *grad_attn_weight_out = grad_attn_weight + grad_sampling_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; @@ -507,23 +509,23 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v1( _grad_h = cache_grad_sampling_loc[1], _grad_a = cache_grad_attn_weight[0]; int sid = 2; - for (unsigned int _tid = 1; _tid < blockDim.x; ++_tid) { + for (unsigned int tid = 1; tid < blockDim.x; ++tid) { _grad_w += cache_grad_sampling_loc[sid]; _grad_h += cache_grad_sampling_loc[sid + 1]; - _grad_a += cache_grad_attn_weight[_tid]; + _grad_a += cache_grad_attn_weight[tid]; sid += 2; } - *grad_sampling_loc_out = _grad_w; - *(grad_sampling_loc_out + 1) = _grad_h; - *grad_attn_weight_out = _grad_a; + *grad_sampling_loc = _grad_w; + *(grad_sampling_loc + 1) = _grad_h; + *grad_attn_weight = _grad_a; } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; - grad_attn_weight_out += grad_weight_stride; - grad_sampling_loc_out += grad_loc_stride; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; } } } @@ -538,11 +540,11 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2( const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { - extern __shared__ int _s[]; - scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); - scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; - unsigned int tid = threadIdx.x; CUDA_1D_KERNEL_LOOP(index, n) { + extern __shared__ int _s[]; + scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); + scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; @@ -557,9 +559,8 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2( int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; - scalar_t *grad_sampling_loc_out = - grad_sampling_loc + (grad_sampling_ptr << 1); - scalar_t *grad_attn_weight_out = grad_attn_weight + grad_sampling_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; @@ -617,16 +618,16 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2( } if (tid == 0) { - *grad_sampling_loc_out = cache_grad_sampling_loc[0]; - *(grad_sampling_loc_out + 1) = cache_grad_sampling_loc[1]; - *grad_attn_weight_out = cache_grad_attn_weight[0]; + *grad_sampling_loc = cache_grad_sampling_loc[0]; + *(grad_sampling_loc + 1) = cache_grad_sampling_loc[1]; + *grad_attn_weight = cache_grad_attn_weight[0]; } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; - grad_attn_weight_out += grad_weight_stride; - grad_sampling_loc_out += grad_loc_stride; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; } } } @@ -641,11 +642,11 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks( const int channels, const int num_levels, const int num_query, const int num_point, scalar_t *grad_value, scalar_t *grad_sampling_loc, scalar_t *grad_attn_weight) { - extern __shared__ int _s[]; - scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); - scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; - unsigned int tid = threadIdx.x; CUDA_1D_KERNEL_LOOP(index, n) { + extern __shared__ int _s[]; + scalar_t *cache_grad_sampling_loc = reinterpret_cast(_s); + scalar_t *cache_grad_attn_weight = cache_grad_sampling_loc + 2 * blockDim.x; + unsigned int tid = threadIdx.x; int _temp = index; const int c_col = _temp % channels; _temp /= channels; @@ -660,9 +661,8 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks( int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; - scalar_t *grad_sampling_loc_out = - grad_sampling_loc + (grad_sampling_ptr << 1); - scalar_t *grad_attn_weight_out = grad_attn_weight + grad_sampling_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; @@ -720,16 +720,16 @@ __global__ void ms_deformable_col2im_gpu_kernel_shm_reduce_v2_multi_blocks( } if (tid == 0) { - atomicAdd(grad_sampling_loc_out, cache_grad_sampling_loc[0]); - atomicAdd(grad_sampling_loc_out + 1, cache_grad_sampling_loc[1]); - atomicAdd(grad_attn_weight_out, cache_grad_attn_weight[0]); + atomicAdd(grad_sampling_loc, cache_grad_sampling_loc[0]); + atomicAdd(grad_sampling_loc + 1, cache_grad_sampling_loc[1]); + atomicAdd(grad_attn_weight, cache_grad_attn_weight[0]); } __syncthreads(); data_weight_ptr += 1; data_loc_w_ptr += 2; - grad_attn_weight_out += grad_weight_stride; - grad_sampling_loc_out += grad_loc_stride; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; } } } @@ -759,9 +759,8 @@ __global__ void ms_deformable_col2im_gpu_kernel_gm( int data_weight_ptr = sampling_index * num_levels * num_point; int data_loc_w_ptr = data_weight_ptr << 1; const int grad_sampling_ptr = data_weight_ptr; - scalar_t *grad_sampling_loc_out = - grad_sampling_loc + (grad_sampling_ptr << 1); - scalar_t *grad_attn_weight_out = grad_attn_weight + grad_sampling_ptr; + grad_sampling_loc += grad_sampling_ptr << 1; + grad_attn_weight += grad_sampling_ptr; const int grad_weight_stride = 1; const int grad_loc_stride = 2; const int qid_stride = num_heads * channels; @@ -788,12 +787,12 @@ __global__ void ms_deformable_col2im_gpu_kernel_gm( ms_deform_attn_col2im_bilinear_gm( data_value_ptr, spatial_h, spatial_w, num_heads, channels, h_im, w_im, m_col, c_col, top_grad, weight, grad_value_ptr, - grad_sampling_loc_out, grad_attn_weight_out); + grad_sampling_loc, grad_attn_weight); } data_weight_ptr += 1; data_loc_w_ptr += 2; - grad_attn_weight_out += grad_weight_stride; - grad_sampling_loc_out += grad_loc_stride; + grad_attn_weight += grad_weight_stride; + grad_sampling_loc += grad_loc_stride; } } } diff --git a/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh index 281d9f0..40a2f46 100644 --- a/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh @@ -27,91 +27,48 @@ __device__ inline bool devIoU(float const *const a, float const *const b, return interS > threshold * (Sa + Sb - interS); } -__global__ static void nms_cuda(const int n_boxes, const float iou_threshold, - const int offset, const float *dev_boxes, - unsigned long long *dev_mask) { - int blocks = (n_boxes + threadsPerBlock - 1) / threadsPerBlock; - CUDA_2D_KERNEL_BLOCK_LOOP(col_start, blocks, row_start, blocks) { - const int tid = threadIdx.x; - - if (row_start > col_start) return; - - const int row_size = - fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); - const int col_size = - fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); - - __shared__ float block_boxes[threadsPerBlock * 4]; - if (tid < col_size) { - block_boxes[tid * 4 + 0] = - dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 0]; - block_boxes[tid * 4 + 1] = - dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 1]; - block_boxes[tid * 4 + 2] = - dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 2]; - block_boxes[tid * 4 + 3] = - dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 3]; - } - __syncthreads(); - - if (tid < row_size) { - const int cur_box_idx = threadsPerBlock * row_start + tid; - const float *cur_box = dev_boxes + cur_box_idx * 4; - int i = 0; - unsigned long long int t = 0; - int start = 0; - if (row_start == col_start) { - start = tid + 1; - } - for (i = start; i < col_size; i++) { - if (devIoU(cur_box, block_boxes + i * 4, offset, iou_threshold)) { - t |= 1ULL << i; - } - } - dev_mask[cur_box_idx * gridDim.y + col_start] = t; - } - } -} - -__global__ static void gather_keep_from_mask(bool *keep, - const unsigned long long *dev_mask, - const int n_boxes) { - const int col_blocks = (n_boxes + threadsPerBlock - 1) / threadsPerBlock; +__global__ void nms_cuda(const int n_boxes, const float iou_threshold, + const int offset, const float *dev_boxes, + unsigned long long *dev_mask) { + const int row_start = blockIdx.y; + const int col_start = blockIdx.x; const int tid = threadIdx.x; - // mark the bboxes which have been removed. - extern __shared__ unsigned long long removed[]; + if (row_start > col_start) return; - // initialize removed. - for (int i = tid; i < col_blocks; i += blockDim.x) { - removed[i] = 0; + const int row_size = + fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); + const int col_size = + fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); + + __shared__ float block_boxes[threadsPerBlock * 4]; + if (tid < col_size) { + block_boxes[tid * 4 + 0] = + dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 0]; + block_boxes[tid * 4 + 1] = + dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 1]; + block_boxes[tid * 4 + 2] = + dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 2]; + block_boxes[tid * 4 + 3] = + dev_boxes[(threadsPerBlock * col_start + tid) * 4 + 3]; } __syncthreads(); - for (int nblock = 0; nblock < col_blocks; ++nblock) { - auto removed_val = removed[nblock]; - __syncthreads(); - const int i_offset = nblock * threadsPerBlock; -#pragma unroll - for (int inblock = 0; inblock < threadsPerBlock; ++inblock) { - const int i = i_offset + inblock; - if (i >= n_boxes) break; - // select a candidate, check if it should kept. - if (!(removed_val & (1ULL << inblock))) { - if (tid == 0) { - // mark the output. - keep[i] = true; - } - auto p = dev_mask + i * col_blocks; - // remove all bboxes which overlap the candidate. - for (int j = tid; j < col_blocks; j += blockDim.x) { - if (j >= nblock) removed[j] |= p[j]; - } - __syncthreads(); - removed_val = removed[nblock]; + if (tid < row_size) { + const int cur_box_idx = threadsPerBlock * row_start + tid; + const float *cur_box = dev_boxes + cur_box_idx * 4; + int i = 0; + unsigned long long int t = 0; + int start = 0; + if (row_start == col_start) { + start = tid + 1; + } + for (i = start; i < col_size; i++) { + if (devIoU(cur_box, block_boxes + i * 4, offset, iou_threshold)) { + t |= 1ULL << i; } } + dev_mask[cur_box_idx * gridDim.y + col_start] = t; } } - #endif // NMS_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh b/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh deleted file mode 100644 index bba3b82..0000000 --- a/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -#ifndef NMS_QUADRI_CUDA_CUH -#define NMS_QUADRI_CUDA_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif -#include "box_iou_rotated_utils.hpp" - -__host__ __device__ inline int divideUP(const int x, const int y) { - return (((x) + (y)-1) / (y)); -} - -namespace { -int const threadsPerBlock = sizeof(unsigned long long) * 8; -} - -template -__global__ void nms_quadri_cuda_kernel(const int n_boxes, - const float iou_threshold, - const T* dev_boxes, - unsigned long long* dev_mask, - const int multi_label) { - if (multi_label == 1) { - const int row_start = blockIdx.y; - const int col_start = blockIdx.x; - - // if (row_start > col_start) return; - - const int row_size = - min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); - const int col_size = - min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); - - // Compared to nms_cuda_kernel, where each box is represented with 4 values - // (x1, y1, x2, y2), each rotated box is represented with 8 values - // (x1, y1, ..., x4, y4) here. - __shared__ T block_boxes[threadsPerBlock * 8]; - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 8 + 0] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 9 + 0]; - block_boxes[threadIdx.x * 8 + 1] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 9 + 1]; - block_boxes[threadIdx.x * 8 + 2] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 9 + 2]; - block_boxes[threadIdx.x * 8 + 3] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 9 + 3]; - block_boxes[threadIdx.x * 8 + 4] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 9 + 4]; - block_boxes[threadIdx.x * 8 + 5] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 9 + 5]; - block_boxes[threadIdx.x * 8 + 6] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 9 + 6]; - block_boxes[threadIdx.x * 8 + 7] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 9 + 7]; - } - __syncthreads(); - - if (threadIdx.x < row_size) { - const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; - const T* cur_box = dev_boxes + cur_box_idx * 9; - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - // Instead of devIoU used by original horizontal nms, here - // we use the single_box_iou_quadri function from - // box_iou_rotated_utils.h - if (single_box_iou_quadri(cur_box, block_boxes + i * 8, 0) > - iou_threshold) { - t |= 1ULL << i; - } - } - const int col_blocks = divideUP(n_boxes, threadsPerBlock); - dev_mask[cur_box_idx * col_blocks + col_start] = t; - } - } else { - const int row_start = blockIdx.y; - const int col_start = blockIdx.x; - - // if (row_start > col_start) return; - - const int row_size = - min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); - const int col_size = - min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); - - // Compared to nms_cuda_kernel, where each box is represented with 4 values - // (x1, y1, x2, y2), each rotated box is represented with 8 values - // (x1, y1, , ..., x4, y4) here. - __shared__ T block_boxes[threadsPerBlock * 8]; - if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 8 + 0] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 8 + 0]; - block_boxes[threadIdx.x * 8 + 1] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 8 + 1]; - block_boxes[threadIdx.x * 8 + 2] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 8 + 2]; - block_boxes[threadIdx.x * 8 + 3] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 8 + 3]; - block_boxes[threadIdx.x * 8 + 4] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 8 + 4]; - block_boxes[threadIdx.x * 8 + 5] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 8 + 5]; - block_boxes[threadIdx.x * 8 + 6] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 8 + 6]; - block_boxes[threadIdx.x * 8 + 7] = - dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 8 + 7]; - } - __syncthreads(); - - if (threadIdx.x < row_size) { - const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; - const T* cur_box = dev_boxes + cur_box_idx * 8; - int i = 0; - unsigned long long t = 0; - int start = 0; - if (row_start == col_start) { - start = threadIdx.x + 1; - } - for (i = start; i < col_size; i++) { - // Instead of devIoU used by original horizontal nms, here - // we use the single_box_iou_quadri function from - // box_iou_rotated_utils.h - if (single_box_iou_quadri(cur_box, block_boxes + i * 8, 0) > - iou_threshold) { - t |= 1ULL << i; - } - } - const int col_blocks = divideUP(n_boxes, threadsPerBlock); - dev_mask[cur_box_idx * col_blocks + col_start] = t; - } - } -} - -#endif diff --git a/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh b/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh index 747327a..80bed96 100644 --- a/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh +++ b/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh @@ -43,16 +43,18 @@ __global__ void nms_rotated_cuda_kernel(const int n_boxes, // (x_center, y_center, width, height, angle_degrees) here. __shared__ T block_boxes[threadsPerBlock * 5]; if (threadIdx.x < col_size) { - block_boxes[threadIdx.x * 5 + 0] = + block_boxes[threadIdx.x * 6 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0]; - block_boxes[threadIdx.x * 5 + 1] = + block_boxes[threadIdx.x * 6 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1]; - block_boxes[threadIdx.x * 5 + 2] = + block_boxes[threadIdx.x * 6 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2]; - block_boxes[threadIdx.x * 5 + 3] = + block_boxes[threadIdx.x * 6 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3]; - block_boxes[threadIdx.x * 5 + 4] = + block_boxes[threadIdx.x * 6 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4]; + block_boxes[threadIdx.x * 6 + 5] = + dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5]; } __syncthreads(); @@ -69,7 +71,7 @@ __global__ void nms_rotated_cuda_kernel(const int n_boxes, // Instead of devIoU used by original horizontal nms, here // we use the single_box_iou_rotated function from // box_iou_rotated_utils.h - if (single_box_iou_rotated(cur_box, block_boxes + i * 5, 0) > + if (single_box_iou_rotated(cur_box, block_boxes + i * 6, 0) > iou_threshold) { t |= 1ULL << i; } diff --git a/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh index 3423620..12182cc 100644 --- a/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh @@ -45,21 +45,20 @@ __global__ void points_in_boxes_part_forward_cuda_kernel( // (B, npoints), default -1 int bs_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) { - if (bs_idx >= batch_size) return; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= batch_size || pt_idx >= pts_num) return; - boxes += bs_idx * boxes_num * 7; - pts += bs_idx * pts_num * 3 + pt_idx * 3; - box_idx_of_points += bs_idx * pts_num + pt_idx; + boxes += bs_idx * boxes_num * 7; + pts += bs_idx * pts_num * 3 + pt_idx * 3; + box_idx_of_points += bs_idx * pts_num + pt_idx; - T local_x = 0, local_y = 0; - int cur_in_flag = 0; - for (int k = 0; k < boxes_num; k++) { - cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y); - if (cur_in_flag) { - box_idx_of_points[0] = k; - break; - } + T local_x = 0, local_y = 0; + int cur_in_flag = 0; + for (int k = 0; k < boxes_num; k++) { + cur_in_flag = check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y); + if (cur_in_flag) { + box_idx_of_points[0] = k; + break; } } } @@ -74,20 +73,19 @@ __global__ void points_in_boxes_all_forward_cuda_kernel( // (B, npoints), default -1 int bs_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) { - if (bs_idx >= batch_size) return; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= batch_size || pt_idx >= pts_num) return; - boxes += bs_idx * boxes_num * 7; - pts += bs_idx * pts_num * 3 + pt_idx * 3; - box_idx_of_points += bs_idx * pts_num * boxes_num + pt_idx * boxes_num; + boxes += bs_idx * boxes_num * 7; + pts += bs_idx * pts_num * 3 + pt_idx * 3; + box_idx_of_points += bs_idx * pts_num * boxes_num + pt_idx * boxes_num; - T local_x = 0, local_y = 0; - for (int k = 0; k < boxes_num; k++) { - const int cur_in_flag = - check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y); - if (cur_in_flag) { - box_idx_of_points[k] = 1; - } + T local_x = 0, local_y = 0; + for (int k = 0; k < boxes_num; k++) { + const int cur_in_flag = + check_pt_in_box3d(pts, boxes + k * 7, local_x, local_y); + if (cur_in_flag) { + box_idx_of_points[k] = 1; } } } diff --git a/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh deleted file mode 100644 index a0769d7..0000000 --- a/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -#ifndef POINTS_IN_POLYGONS_CUDA_KERNEL_CUH -#define POINTS_IN_POLYGONS_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -struct point { - float x, y; -}; - -template -__global__ void points_in_polygons_forward_cuda_kernel( - const int nthreads, const scalar_t *vertex1, const scalar_t *vertex2, - const int rows, const int cols, scalar_t *inside_flag) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int row = index / cols; - int col = index % cols; - - const scalar_t *offset_vertex1 = vertex1 + row * 2; - const scalar_t *offset_vertex2 = vertex2 + col * 8; - - point point_[1]; - point polygon[4]; - - point_[0].x = offset_vertex1[0]; - point_[0].y = offset_vertex1[1]; - - polygon[0].x = offset_vertex2[0]; - polygon[0].y = offset_vertex2[1]; - polygon[1].x = offset_vertex2[2]; - polygon[1].y = offset_vertex2[3]; - polygon[2].x = offset_vertex2[4]; - polygon[2].y = offset_vertex2[5]; - polygon[3].x = offset_vertex2[6]; - polygon[3].y = offset_vertex2[7]; - - int nCross = 0; - int i, j; - float sx, sy, tx, ty, px, py, x; - for (i = 0, j = 3; i < 4; j = i, i++) { - sx = polygon[i].x; - sy = polygon[i].y; - tx = polygon[j].x; - ty = polygon[j].y; - - px = point_[0].x; - py = point_[0].y; - - if (py < min(sy, ty)) continue; - if (py > max(sy, ty)) continue; - - if ((sx == px && sy == py) || (tx == px && ty == py)) { - break; - } else { - if ((sy < py && ty >= py) || (sy >= py && ty < py)) { - x = sx + (py - sy) * (tx - sx) / (ty - sy); - if (x == px) { - break; - } - if (x > px) { - nCross++; - } - } - } - } - if (nCross % 2 == 1) { - inside_flag[index] = 1.0; - } else { - inside_flag[index] = 0.0; - } - return; - } -} - -#endif // POINTS_IN_POLYGONS_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh deleted file mode 100644 index e2f5a11..0000000 --- a/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh +++ /dev/null @@ -1,381 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -// Modified from -// https://github.com/vacancy/PreciseRoIPooling/blob/master/src/prroi_pooling_gpu_impl.cu -// Distributed under terms of the MIT license. -#ifndef PRROI_POOL_CUDA_KERNEL_CUH -#define PRROI_POOL_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -template -__device__ static __forceinline__ T PrRoIPoolingGetData(const T *data, - const int h, - const int w, - const int height, - const int width) { - bool overflow = (h < 0) || (w < 0) || (h >= height) || (w >= width); - T retVal = overflow ? 0.0f : data[h * width + w]; - return retVal; -} - -template -__device__ static __forceinline__ T PrRoIPoolingGetCoeff(T dh, T dw) { - return (1.0f - abs(dh)) * (1.0f - abs(dw)); -} - -template -__device__ static __forceinline__ T PrRoIPoolingSingleCoorIntegral(T s, T t, - T c1, T c2) { - return 0.5 * (t * t - s * s) * (c2 - c1) + (t - s) * c1; -} - -template -__device__ static T PrRoIPoolingInterpolation(const T *data, const T h, - const T w, const int height, - const int width) { - T retVal = 0.0f; - int h1 = floorf(h); - int w1 = floorf(w); - retVal += PrRoIPoolingGetData(data, h1, w1, height, width) * - PrRoIPoolingGetCoeff(h - T(h1), w - T(w1)); - h1 = floorf(h) + 1; - w1 = floorf(w); - retVal += PrRoIPoolingGetData(data, h1, w1, height, width) * - PrRoIPoolingGetCoeff(h - T(h1), w - T(w1)); - h1 = floorf(h); - w1 = floorf(w) + 1; - retVal += PrRoIPoolingGetData(data, h1, w1, height, width) * - PrRoIPoolingGetCoeff(h - T(h1), w - T(w1)); - h1 = floorf(h) + 1; - w1 = floorf(w) + 1; - retVal += PrRoIPoolingGetData(data, h1, w1, height, width) * - PrRoIPoolingGetCoeff(h - T(h1), w - T(w1)); - return retVal; -} - -template -__device__ static T PrRoIPoolingMatCalculation(const T *this_data, - const int s_h, const int s_w, - const int e_h, const int e_w, - const T y0, const T x0, - const T y1, const T x1, - const int h0, const int w0) { - T alpha, beta, lim_alpha, lim_beta, tmp; - T sum_out = 0; - - alpha = x0 - T(s_w); - beta = y0 - T(s_h); - lim_alpha = x1 - T(s_w); - lim_beta = y1 - T(s_h); - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - sum_out += PrRoIPoolingGetData(this_data, s_h, s_w, h0, w0) * tmp; - - alpha = T(e_w) - x1; - lim_alpha = T(e_w) - x0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - sum_out += PrRoIPoolingGetData(this_data, s_h, e_w, h0, w0) * tmp; - - alpha = x0 - T(s_w); - beta = T(e_h) - y1; - lim_alpha = x1 - T(s_w); - lim_beta = T(e_h) - y0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - sum_out += PrRoIPoolingGetData(this_data, e_h, s_w, h0, w0) * tmp; - - alpha = T(e_w) - x1; - lim_alpha = T(e_w) - x0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - sum_out += PrRoIPoolingGetData(this_data, e_h, e_w, h0, w0) * tmp; - - return sum_out; -} - -template -__device__ static void PrRoIPoolingDistributeDiff(T *diff, const T top_diff, - const int h, const int w, - const int height, - const int width, - const T coeff) { - bool overflow = (h < 0) || (w < 0) || (h >= height) || (w >= width); - if (!overflow) atomicAdd(diff + h * width + w, top_diff * coeff); -} - -template -__device__ static void PrRoIPoolingMatDistributeDiff( - T *diff, const T top_diff, const int s_h, const int s_w, const int e_h, - const int e_w, const T y0, const T x0, const T y1, const T x1, const int h0, - const int w0) { - T alpha, beta, lim_alpha, lim_beta, tmp; - - alpha = x0 - T(s_w); - beta = y0 - T(s_h); - lim_alpha = x1 - T(s_w); - lim_beta = y1 - T(s_h); - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - PrRoIPoolingDistributeDiff(diff, top_diff, s_h, s_w, h0, w0, tmp); - - alpha = T(e_w) - x1; - lim_alpha = T(e_w) - x0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - PrRoIPoolingDistributeDiff(diff, top_diff, s_h, e_w, h0, w0, tmp); - - alpha = x0 - T(s_w); - beta = T(e_h) - y1; - lim_alpha = x1 - T(s_w); - lim_beta = T(e_h) - y0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - PrRoIPoolingDistributeDiff(diff, top_diff, e_h, s_w, h0, w0, tmp); - - alpha = T(e_w) - x1; - lim_alpha = T(e_w) - x0; - tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + - 0.5f * alpha * alpha) * - (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta); - PrRoIPoolingDistributeDiff(diff, top_diff, e_h, e_w, h0, w0, tmp); -} - -template -__global__ void prroi_pool_forward_cuda_kernel( - const int nthreads, const T *input, const T *rois, T *output, - const int pooled_height, const int pooled_width, const T spatial_scale, - const int channels, const int height, const int width) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - - const T *offset_rois = rois + n * 5; - int roi_batch_ind = offset_rois[0]; - - T roi_x1 = offset_rois[1] * spatial_scale; - T roi_y1 = offset_rois[2] * spatial_scale; - T roi_x2 = offset_rois[3] * spatial_scale; - T roi_y2 = offset_rois[4] * spatial_scale; - - T roi_width = max(roi_x2 - roi_x1, ((T)0.0)); - T roi_height = max(roi_y2 - roi_y1, ((T)0.0)); - T bin_size_h = roi_height / static_cast(pooled_height); - T bin_size_w = roi_width / static_cast(pooled_width); - - const T *this_data = - input + (roi_batch_ind * channels + c) * height * width; - T *this_out = output + index; - - T bin_x1 = roi_x1 + bin_size_w * pw; - T bin_y1 = roi_y1 + bin_size_h * ph; - T bin_x2 = bin_x1 + bin_size_w; - T bin_y2 = bin_y1 + bin_size_h; - - T bin_size = max(T(0.0), bin_size_w * bin_size_h); - if (bin_size == 0) { - *this_out = 0; - continue; - } - - T sum_out = 0; - - int start_x, start_y, end_x, end_y; - - start_x = floorf(bin_x1); - end_x = ceilf(bin_x2); - start_y = floorf(bin_y1); - end_y = ceilf(bin_y2); - - for (int bin_x = start_x; bin_x < end_x; ++bin_x) - for (int bin_y = start_y; bin_y < end_y; ++bin_y) - sum_out += PrRoIPoolingMatCalculation( - this_data, bin_y, bin_x, bin_y + 1, bin_x + 1, - max(bin_y1, T(bin_y)), max(bin_x1, T(bin_x)), - min(bin_y2, T(bin_y) + 1.0f), min(bin_x2, T(bin_x + 1.0f)), height, - width); - *this_out = sum_out / bin_size; - } -} - -template -__global__ void prroi_pool_backward_cuda_kernel( - const int nthreads, const T *grad_output, const T *rois, T *grad_input, - const int pooled_height, const int pooled_width, const T spatial_scale, - const int channels, const int height, const int width) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - auto rois_cur = rois + n * 5; - - int roi_batch_ind = rois_cur[0]; - T roi_x1 = rois_cur[1] * spatial_scale; - T roi_y1 = rois_cur[2] * spatial_scale; - T roi_x2 = rois_cur[3] * spatial_scale; - T roi_y2 = rois_cur[4] * spatial_scale; - - T roi_width = max(roi_x2 - roi_x1, (T)0); - T roi_height = max(roi_y2 - roi_y1, (T)0); - T bin_size_h = roi_height / static_cast(pooled_height); - T bin_size_w = roi_width / static_cast(pooled_width); - - const T *this_out_grad = grad_output + index; - T *this_data_grad = - grad_input + (roi_batch_ind * channels + c) * height * width; - - T bin_x1 = roi_x1 + bin_size_w * pw; - T bin_y1 = roi_y1 + bin_size_h * ph; - T bin_x2 = bin_x1 + bin_size_w; - T bin_y2 = bin_y1 + bin_size_h; - - T bin_size = max(T(0.0), bin_size_w * bin_size_h); - - T sum_out = bin_size == T(0) ? T(0) : *this_out_grad / bin_size; - - int start_x, start_y, end_x, end_y; - - start_x = floorf(bin_x1); - end_x = ceilf(bin_x2); - start_y = floorf(bin_y1); - end_y = ceilf(bin_y2); - - for (int bin_x = start_x; bin_x < end_x; ++bin_x) - for (int bin_y = start_y; bin_y < end_y; ++bin_y) - PrRoIPoolingMatDistributeDiff( - this_data_grad, sum_out, bin_y, bin_x, bin_y + 1, bin_x + 1, - max(bin_y1, T(bin_y)), max(bin_x1, T(bin_x)), - min(bin_y2, T(bin_y) + 1.0f), min(bin_x2, T(bin_x + 1.0f)), height, - width); - } -} - -template -__global__ void prroi_pool_coor_backward_cuda_kernel( - const int nthreads, const T *output, const T *grad_output, const T *input, - const T *rois, T *grad_rois, const int pooled_height, - const int pooled_width, const T spatial_scale, const int channels, - const int height, const int width) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int c = (index / pooled_width / pooled_height) % channels; - int n = index / pooled_width / pooled_height / channels; - auto rois_cur = rois + n * 5; - - int roi_batch_ind = rois_cur[0]; - T roi_x1 = rois_cur[1] * spatial_scale; - T roi_y1 = rois_cur[2] * spatial_scale; - T roi_x2 = rois_cur[3] * spatial_scale; - T roi_y2 = rois_cur[4] * spatial_scale; - - T roi_width = max(roi_x2 - roi_x1, (T)0); - T roi_height = max(roi_y2 - roi_y1, (T)0); - T bin_size_h = roi_height / static_cast(pooled_height); - T bin_size_w = roi_width / static_cast(pooled_width); - - const T output_grad_val = grad_output[index]; - const T *this_input_data = - input + (roi_batch_ind * channels + c) * height * width; - const T output_val = output[index]; - T *this_rois_grad = grad_rois + n * 5; - - T bin_x1 = roi_x1 + bin_size_w * pw; - T bin_y1 = roi_y1 + bin_size_h * ph; - T bin_x2 = bin_x1 + bin_size_w; - T bin_y2 = bin_y1 + bin_size_h; - - T bin_size = max(T(0.0), bin_size_w * bin_size_h); - - T sum_out = bin_size == T(0) ? T(0) : output_grad_val / bin_size; - - // WARNING: to be discussed - if (sum_out == 0) continue; - - int start_x, start_y, end_x, end_y; - - start_x = floorf(bin_x1); - end_x = ceilf(bin_x2); - start_y = floorf(bin_y1); - end_y = ceilf(bin_y2); - - T grad_x1_y = 0, grad_x2_y = 0, grad_x_y1 = 0, grad_x_y2 = 0; - for (int bin_y = start_y; bin_y < end_y; ++bin_y) { - grad_x1_y += PrRoIPoolingSingleCoorIntegral( - max(bin_y1, T(bin_y)) - bin_y, min(bin_y2, T(bin_y + 1)) - bin_y, - PrRoIPoolingInterpolation(this_input_data, float(bin_y), bin_x1, - height, width), - PrRoIPoolingInterpolation(this_input_data, float(bin_y + 1), bin_x1, - height, width)); - - grad_x2_y += PrRoIPoolingSingleCoorIntegral( - max(bin_y1, T(bin_y)) - bin_y, min(bin_y2, T(bin_y + 1)) - bin_y, - PrRoIPoolingInterpolation(this_input_data, float(bin_y), bin_x2, - height, width), - PrRoIPoolingInterpolation(this_input_data, float(bin_y + 1), bin_x2, - height, width)); - } - - for (int bin_x = start_x; bin_x < end_x; ++bin_x) { - grad_x_y1 += PrRoIPoolingSingleCoorIntegral( - max(bin_x1, T(bin_x)) - bin_x, min(bin_x2, T(bin_x + 1)) - bin_x, - PrRoIPoolingInterpolation(this_input_data, bin_y1, float(bin_x), - height, width), - PrRoIPoolingInterpolation(this_input_data, bin_y1, float(bin_x + 1), - height, width)); - - grad_x_y2 += PrRoIPoolingSingleCoorIntegral( - max(bin_x1, T(bin_x)) - bin_x, min(bin_x2, T(bin_x + 1)) - bin_x, - PrRoIPoolingInterpolation(this_input_data, bin_y2, float(bin_x), - height, width), - PrRoIPoolingInterpolation(this_input_data, bin_y2, float(bin_x + 1), - height, width)); - } - - T partial_x1 = -grad_x1_y + (bin_y2 - bin_y1) * output_val; - T partial_y1 = -grad_x_y1 + (bin_x2 - bin_x1) * output_val; - T partial_x2 = grad_x2_y - (bin_y2 - bin_y1) * output_val; - T partial_y2 = grad_x_y2 - (bin_x2 - bin_x1) * output_val; - - partial_x1 = partial_x1 / bin_size * spatial_scale; - partial_x2 = partial_x2 / bin_size * spatial_scale; - partial_y1 = partial_y1 / bin_size * spatial_scale; - partial_y2 = partial_y2 / bin_size * spatial_scale; - - // (index, x1, y1, x2, y2) - this_rois_grad[0] = 0; - atomicAdd(this_rois_grad + 1, - (partial_x1 * (1.0f - T(pw) / pooled_width) + - partial_x2 * (1.0f - T(pw + 1) / pooled_width)) * - output_grad_val); - atomicAdd(this_rois_grad + 2, - (partial_y1 * (1.0f - T(ph) / pooled_height) + - partial_y2 * (1.0f - T(ph + 1) / pooled_height)) * - output_grad_val); - atomicAdd(this_rois_grad + 3, (partial_x2 * T(pw + 1) / pooled_width + - partial_x1 * T(pw) / pooled_width) * - output_grad_val); - atomicAdd(this_rois_grad + 4, (partial_y2 * T(ph + 1) / pooled_height + - partial_y1 * T(ph) / pooled_height) * - output_grad_val); - } -} - -#endif // ROI_POOL_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh deleted file mode 100644 index 4383d9e..0000000 --- a/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh +++ /dev/null @@ -1,242 +0,0 @@ -// Modified from -// https://github.com/csuhan/ReDet/blob/master/mmdet/ops/riroi_align/src/riroi_align_kernel.cu -#ifndef RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH -#define RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH - -#include -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else // MMCV_USE_PARROTS -#include "pytorch_cuda_helper.hpp" -#endif // MMCV_USE_PARROTS - -/*** Forward ***/ -template -__global__ void riroi_align_rotated_forward_cuda_kernel( - const int nthreads, const scalar_t *bottom_data, - const scalar_t *bottom_rois, const scalar_t spatial_scale, - const int num_samples, const bool clockwise, const int channels, - const int height, const int width, const int pooled_height, - const int pooled_width, const int num_orientations, scalar_t *top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int o = (index / pooled_width / pooled_height) % num_orientations; - int c = - (index / pooled_width / pooled_height / num_orientations) % channels; - int n = index / pooled_width / pooled_height / num_orientations / channels; - - const scalar_t *offset_bottom_rois = bottom_rois + n * 6; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not using rounding; this implementation detail is critical - scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale; - scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale; - scalar_t roi_width = offset_bottom_rois[3] * spatial_scale; - scalar_t roi_height = offset_bottom_rois[4] * spatial_scale; - // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0; - scalar_t theta = offset_bottom_rois[5]; - // Force malformed ROIs to be 1x1 - roi_width = max(roi_width, (scalar_t)1.); - roi_height = max(roi_height, (scalar_t)1.); - scalar_t bin_size_h = static_cast(roi_height) / - static_cast(pooled_height); - scalar_t bin_size_w = - static_cast(roi_width) / static_cast(pooled_width); - - // find aligned index - scalar_t ind_float = theta * num_orientations / (2 * M_PI); - int ind = floorf(ind_float); - scalar_t l_var = ind_float - (scalar_t)ind; - scalar_t r_var = 1.0 - l_var; - // correct start channel - ind = (ind + num_orientations) % num_orientations; - // rotated channel - int ind_rot = (o - ind + num_orientations) % num_orientations; - int ind_rot_plus = (ind_rot + 1 + num_orientations) % num_orientations; - const scalar_t *offset_bottom_data = - bottom_data + (roi_batch_ind * channels * num_orientations + - c * num_orientations + ind_rot) * - height * width; - - const scalar_t *offset_bottom_data_plus = - bottom_data + (roi_batch_ind * channels * num_orientations + - c * num_orientations + ind_rot_plus) * - height * width; - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (num_samples > 0) - ? num_samples - : ceilf(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (num_samples > 0) ? num_samples : ceilf(roi_width / pooled_width); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - if (clockwise) { - theta = -theta; // If clockwise, the angle needs to be reversed. - } - scalar_t roi_start_h = -roi_height / 2.0; - scalar_t roi_start_w = -roi_width / 2.0; - scalar_t cosscalar_theta = cos(theta); - scalar_t sinscalar_theta = sin(theta); - - // We do average (integral) pooling inside a bin - const scalar_t count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4 - - scalar_t output_val = 0.; - for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1 - const scalar_t yy = - roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const scalar_t xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta (counterclockwise) around the center and translate - scalar_t y = yy * cosscalar_theta - xx * sinscalar_theta + roi_center_h; - scalar_t x = yy * sinscalar_theta + xx * cosscalar_theta + roi_center_w; - - scalar_t val = bilinear_interpolate( - offset_bottom_data, height, width, y, x, index); - scalar_t val_plus = bilinear_interpolate( - offset_bottom_data_plus, height, width, y, x, index); - output_val += r_var * val + l_var * val_plus; - } - } - output_val /= count; - - top_data[index] = output_val; - } -} - -/*** Backward ***/ -template -__global__ void riroi_align_rotated_backward_cuda_kernel( - const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois, - const scalar_t spatial_scale, const int num_samples, const bool clockwise, - const int channels, const int height, const int width, - const int pooled_height, const int pooled_width, const int num_orientations, - scalar_t *bottom_diff) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - // (n, c, ph, pw) is an element in the pooled output - int pw = index % pooled_width; - int ph = (index / pooled_width) % pooled_height; - int o = (index / pooled_width / pooled_height) % num_orientations; - int c = - (index / pooled_width / pooled_height / num_orientations) % channels; - int n = index / pooled_width / pooled_height / num_orientations / channels; - - const scalar_t *offset_bottom_rois = bottom_rois + n * 6; - int roi_batch_ind = offset_bottom_rois[0]; - - // Do not round - scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale; - scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale; - scalar_t roi_width = offset_bottom_rois[3] * spatial_scale; - scalar_t roi_height = offset_bottom_rois[4] * spatial_scale; - // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0; - scalar_t theta = offset_bottom_rois[5]; - // Force malformed ROIs to be 1x1 - roi_width = max(roi_width, (scalar_t)1.); - roi_height = max(roi_height, (scalar_t)1.); - - scalar_t bin_size_h = static_cast(roi_height) / - static_cast(pooled_height); - scalar_t bin_size_w = - static_cast(roi_width) / static_cast(pooled_width); - - // find aligned index - scalar_t ind_float = theta * num_orientations / (2 * M_PI); - int ind = floorf(ind_float); - scalar_t l_var = ind_float - (scalar_t)ind; - scalar_t r_var = 1.0 - l_var; - // correct start channel - ind = (ind + num_orientations) % num_orientations; - // rotated channel - int ind_rot = (o - ind + num_orientations) % num_orientations; - int ind_rot_plus = (ind_rot + 1 + num_orientations) % num_orientations; - scalar_t *offset_bottom_diff = - bottom_diff + (roi_batch_ind * channels * num_orientations + - c * num_orientations + ind_rot) * - height * width; - scalar_t *offset_bottom_diff_plus = - bottom_diff + (roi_batch_ind * channels * num_orientations + - c * num_orientations + ind_rot_plus) * - height * width; - int top_offset = - (n * channels * num_orientations + c * num_orientations + o) * - pooled_height * pooled_width; - const scalar_t *offset_top_diff = top_diff + top_offset; - const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; - - // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (num_samples > 0) - ? num_samples - : ceilf(roi_height / pooled_height); // e.g., = 2 - int roi_bin_grid_w = - (num_samples > 0) ? num_samples : ceilf(roi_width / pooled_width); - - // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). - // Appropriate translation needs to be applied after. - if (clockwise) { - theta = -theta; // If clockwise, the angle needs to be reversed. - } - scalar_t roi_start_h = -roi_height / 2.0; - scalar_t roi_start_w = -roi_width / 2.0; - scalar_t cosTheta = cos(theta); - scalar_t sinTheta = sin(theta); - - // We do average (integral) pooling inside a bin - const scalar_t count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 - - for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1 - const scalar_t yy = - roi_start_h + ph * bin_size_h + - static_cast(iy + .5f) * bin_size_h / - static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 - for (int ix = 0; ix < roi_bin_grid_w; ix++) { - const scalar_t xx = roi_start_w + pw * bin_size_w + - static_cast(ix + .5f) * bin_size_w / - static_cast(roi_bin_grid_w); - - // Rotate by theta around the center and translate - scalar_t y = yy * cosTheta - xx * sinTheta + roi_center_h; - scalar_t x = yy * sinTheta + xx * cosTheta + roi_center_w; - - scalar_t w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, - w4, x_low, x_high, y_low, - y_high, index); - - scalar_t g1 = top_diff_this_bin * w1 / count; - scalar_t g2 = top_diff_this_bin * w2 / count; - scalar_t g3 = top_diff_this_bin * w3 / count; - scalar_t g4 = top_diff_this_bin * w4 / count; - - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - atomicAdd(offset_bottom_diff + y_low * width + x_low, g1 * r_var); - atomicAdd(offset_bottom_diff + y_low * width + x_high, g2 * r_var); - atomicAdd(offset_bottom_diff + y_high * width + x_low, g3 * r_var); - atomicAdd(offset_bottom_diff + y_high * width + x_high, g4 * r_var); - - atomicAdd(offset_bottom_diff_plus + y_low * width + x_low, - g1 * l_var); - atomicAdd(offset_bottom_diff_plus + y_low * width + x_high, - g2 * l_var); - atomicAdd(offset_bottom_diff_plus + y_high * width + x_low, - g3 * l_var); - atomicAdd(offset_bottom_diff_plus + y_high * width + x_high, - g4 * l_var); - - } // if - } // ix - } // iy - } // CUDA_1D_KERNEL_LOOP -} // RiRoIAlignBackward - -#endif // RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh index 8274dc5..33571f2 100644 --- a/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh @@ -20,7 +20,7 @@ template __global__ void roi_align_rotated_forward_cuda_kernel( const int nthreads, const scalar_t *bottom_data, const scalar_t *bottom_rois, const scalar_t spatial_scale, - const int sampling_ratio, const bool aligned, const bool clockwise, + const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, scalar_t *top_data) { CUDA_1D_KERNEL_LOOP(index, nthreads) { @@ -58,11 +58,11 @@ __global__ void roi_align_rotated_forward_cuda_kernel( bottom_data + (roi_batch_ind * channels + c) * height * width; // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio + int roi_bin_grid_h = (sample_num > 0) + ? sample_num : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width); + (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. @@ -104,7 +104,7 @@ __global__ void roi_align_rotated_forward_cuda_kernel( template __global__ void roi_align_rotated_backward_cuda_kernel( const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois, - const scalar_t spatial_scale, const int sampling_ratio, const bool aligned, + const scalar_t spatial_scale, const int sample_num, const bool aligned, const bool clockwise, const int channels, const int height, const int width, const int pooled_height, const int pooled_width, scalar_t *bottom_diff) { CUDA_1D_KERNEL_LOOP(index, nthreads) { @@ -146,11 +146,11 @@ __global__ void roi_align_rotated_backward_cuda_kernel( const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw]; // We use roi_bin_grid to sample the grid and mimic integral - int roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio + int roi_bin_grid_h = (sample_num > 0) + ? sample_num : ceilf(roi_height / pooled_height); // e.g., = 2 int roi_bin_grid_w = - (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width); + (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width); // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y). // Appropriate translation needs to be applied after. diff --git a/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh index fc0aacf..3b95dc7 100644 --- a/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh @@ -44,38 +44,37 @@ __global__ void generate_pts_mask_for_box3d(int boxes_num, int pts_num, // coordinate params pts: (npoints, 3) [x, y, z] params pts_mask: (N, // npoints): -1 means point does not in this box, otherwise: encode (x_idxs, // y_idxs, z_idxs) by binary bit + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) { - if (box_idx >= boxes_num) return; + if (pt_idx >= pts_num || box_idx >= boxes_num) return; - pts += pt_idx * 3; - rois += box_idx * 7; - pts_mask += box_idx * pts_num + pt_idx; + pts += pt_idx * 3; + rois += box_idx * 7; + pts_mask += box_idx * pts_num + pt_idx; - T local_x = 0, local_y = 0; - int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y); + T local_x = 0, local_y = 0; + int cur_in_flag = check_pt_in_box3d(pts, rois, local_x, local_y); - pts_mask[0] = -1; - if (cur_in_flag > 0) { - T local_z = pts[2] - rois[2]; - T x_size = rois[3], y_size = rois[4], z_size = rois[5]; + pts_mask[0] = -1; + if (cur_in_flag > 0) { + T local_z = pts[2] - rois[2]; + T x_size = rois[3], y_size = rois[4], z_size = rois[5]; - T x_res = x_size / out_x; - T y_res = y_size / out_y; - T z_res = z_size / out_z; + T x_res = x_size / out_x; + T y_res = y_size / out_y; + T z_res = z_size / out_z; - unsigned int x_idx = int((local_x + x_size / 2) / x_res); - unsigned int y_idx = int((local_y + y_size / 2) / y_res); - unsigned int z_idx = int(local_z / z_res); + unsigned int x_idx = int((local_x + x_size / 2) / x_res); + unsigned int y_idx = int((local_y + y_size / 2) / y_res); + unsigned int z_idx = int(local_z / z_res); - x_idx = min(max(x_idx, 0), out_x - 1); - y_idx = min(max(y_idx, 0), out_y - 1); - z_idx = min(max(z_idx, 0), out_z - 1); + x_idx = min(max(x_idx, 0), out_x - 1); + y_idx = min(max(y_idx, 0), out_y - 1); + z_idx = min(max(z_idx, 0), out_z - 1); - unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx; + unsigned int idx_encoding = (x_idx << 16) + (y_idx << 8) + z_idx; - pts_mask[0] = idx_encoding; - } + pts_mask[0] = idx_encoding; } } @@ -87,24 +86,26 @@ __global__ void collect_inside_pts_for_box3d(int boxes_num, int pts_num, T *pts_idx_of_voxels) { // params pts_mask: (N, npoints) 0 or 1 // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel) - CUDA_1D_KERNEL_LOOP(box_idx, boxes_num) { - int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter - pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel; - - for (int k = 0; k < pts_num; k++) { - if (pts_mask[box_idx * pts_num + k] != -1) { - unsigned int idx_encoding = pts_mask[box_idx * pts_num + k]; - unsigned int x_idx = (idx_encoding >> 16) & 0xFF; - unsigned int y_idx = (idx_encoding >> 8) & 0xFF; - unsigned int z_idx = idx_encoding & 0xFF; - unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + - y_idx * out_z * max_pts_each_voxel + - z_idx * max_pts_each_voxel; - unsigned int cnt = pts_idx_of_voxels[base_offset]; - if (cnt < max_num_pts) { - pts_idx_of_voxels[base_offset + cnt + 1] = k; - pts_idx_of_voxels[base_offset]++; - } + + int box_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (box_idx >= boxes_num) return; + + int max_num_pts = max_pts_each_voxel - 1; // index 0 is the counter + pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel; + + for (int k = 0; k < pts_num; k++) { + if (pts_mask[box_idx * pts_num + k] != -1) { + unsigned int idx_encoding = pts_mask[box_idx * pts_num + k]; + unsigned int x_idx = (idx_encoding >> 16) & 0xFF; + unsigned int y_idx = (idx_encoding >> 8) & 0xFF; + unsigned int z_idx = idx_encoding & 0xFF; + unsigned int base_offset = x_idx * out_y * out_z * max_pts_each_voxel + + y_idx * out_z * max_pts_each_voxel + + z_idx * max_pts_each_voxel; + unsigned int cnt = pts_idx_of_voxels[base_offset]; + if (cnt < max_num_pts) { + pts_idx_of_voxels[base_offset + cnt + 1] = k; + pts_idx_of_voxels[base_offset]++; } } } @@ -123,38 +124,39 @@ __global__ void roiaware_maxpool3d(int boxes_num, int pts_num, int channels, int box_idx = blockIdx.z; int channel_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) { - int x_idx = voxel_idx_flat / (out_y * out_z); - int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; - int z_idx = voxel_idx_flat % out_z; - if (box_idx >= boxes_num || channel_idx >= channels) return; - - int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; - pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + - offset_base * max_pts_each_voxel; - pooled_features += box_idx * out_x * out_y * out_z * channels + - offset_base * channels + channel_idx; - argmax += box_idx * out_x * out_y * out_z * channels + - offset_base * channels + channel_idx; - - int argmax_idx = -1; - float max_val = -1e50; - - int total_pts = pts_idx_of_voxels[0]; - - for (int k = 1; k <= total_pts; k++) { - if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > - max_val) { - max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; - argmax_idx = pts_idx_of_voxels[k]; - } + int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; + + int x_idx = voxel_idx_flat / (out_y * out_z); + int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; + int z_idx = voxel_idx_flat % out_z; + if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x || + y_idx >= out_y || z_idx >= out_z) + return; + + int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; + pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + + offset_base * max_pts_each_voxel; + pooled_features += box_idx * out_x * out_y * out_z * channels + + offset_base * channels + channel_idx; + argmax += box_idx * out_x * out_y * out_z * channels + + offset_base * channels + channel_idx; + + int argmax_idx = -1; + float max_val = -1e50; + + int total_pts = pts_idx_of_voxels[0]; + + for (int k = 1; k <= total_pts; k++) { + if (pts_feature[pts_idx_of_voxels[k] * channels + channel_idx] > max_val) { + max_val = pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; + argmax_idx = pts_idx_of_voxels[k]; } + } - if (argmax_idx != -1) { - pooled_features[0] = max_val; - } - argmax[0] = argmax_idx; + if (argmax_idx != -1) { + pooled_features[0] = max_val; } + argmax[0] = argmax_idx; } template @@ -170,28 +172,30 @@ __global__ void roiaware_avgpool3d(int boxes_num, int pts_num, int channels, int box_idx = blockIdx.z; int channel_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) { - int x_idx = voxel_idx_flat / (out_y * out_z); - int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; - int z_idx = voxel_idx_flat % out_z; - if (box_idx >= boxes_num || channel_idx >= channels) return; - - int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; - pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + - offset_base * max_pts_each_voxel; - pooled_features += box_idx * out_x * out_y * out_z * channels + - offset_base * channels + channel_idx; - - float sum_val = 0; - int total_pts = pts_idx_of_voxels[0]; - - for (int k = 1; k <= total_pts; k++) { - sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; - } + int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; + + int x_idx = voxel_idx_flat / (out_y * out_z); + int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; + int z_idx = voxel_idx_flat % out_z; + if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x || + y_idx >= out_y || z_idx >= out_z) + return; + + int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; + pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + + offset_base * max_pts_each_voxel; + pooled_features += box_idx * out_x * out_y * out_z * channels + + offset_base * channels + channel_idx; + + float sum_val = 0; + int total_pts = pts_idx_of_voxels[0]; + + for (int k = 1; k <= total_pts; k++) { + sum_val += pts_feature[pts_idx_of_voxels[k] * channels + channel_idx]; + } - if (total_pts > 0) { - pooled_features[0] = sum_val / total_pts; - } + if (total_pts > 0) { + pooled_features[0] = sum_val / total_pts; } } @@ -206,22 +210,24 @@ __global__ void roiaware_maxpool3d_backward(int boxes_num, int channels, int box_idx = blockIdx.z; int channel_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) { - int x_idx = voxel_idx_flat / (out_y * out_z); - int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; - int z_idx = voxel_idx_flat % out_z; - if (box_idx >= boxes_num || channel_idx >= channels) return; - - int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; - argmax += box_idx * out_x * out_y * out_z * channels + + int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; + + int x_idx = voxel_idx_flat / (out_y * out_z); + int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; + int z_idx = voxel_idx_flat % out_z; + if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x || + y_idx >= out_y || z_idx >= out_z) + return; + + int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; + argmax += box_idx * out_x * out_y * out_z * channels + + offset_base * channels + channel_idx; + grad_out += box_idx * out_x * out_y * out_z * channels + offset_base * channels + channel_idx; - grad_out += box_idx * out_x * out_y * out_z * channels + - offset_base * channels + channel_idx; - if (argmax[0] == -1) return; + if (argmax[0] == -1) return; - atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1); - } + atomicAdd(grad_in + argmax[0] * channels + channel_idx, grad_out[0] * 1); } template @@ -236,24 +242,26 @@ __global__ void roiaware_avgpool3d_backward(int boxes_num, int channels, int box_idx = blockIdx.z; int channel_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(voxel_idx_flat, out_x * out_y * out_z) { - int x_idx = voxel_idx_flat / (out_y * out_z); - int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; - int z_idx = voxel_idx_flat % out_z; - if (box_idx >= boxes_num || channel_idx >= channels) return; - - int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; - pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + - offset_base * max_pts_each_voxel; - grad_out += box_idx * out_x * out_y * out_z * channels + - offset_base * channels + channel_idx; - - int total_pts = pts_idx_of_voxels[0]; - float cur_grad = 1 / fmaxf(float(total_pts), 1.0); - for (int k = 1; k <= total_pts; k++) { - atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, - grad_out[0] * cur_grad); - } + int voxel_idx_flat = blockIdx.x * blockDim.x + threadIdx.x; + + int x_idx = voxel_idx_flat / (out_y * out_z); + int y_idx = (voxel_idx_flat - x_idx * (out_y * out_z)) / out_z; + int z_idx = voxel_idx_flat % out_z; + if (box_idx >= boxes_num || channel_idx >= channels || x_idx >= out_x || + y_idx >= out_y || z_idx >= out_z) + return; + + int offset_base = x_idx * out_y * out_z + y_idx * out_z + z_idx; + pts_idx_of_voxels += box_idx * out_x * out_y * out_z * max_pts_each_voxel + + offset_base * max_pts_each_voxel; + grad_out += box_idx * out_x * out_y * out_z * channels + + offset_base * channels + channel_idx; + + int total_pts = pts_idx_of_voxels[0]; + float cur_grad = 1 / fmaxf(float(total_pts), 1.0); + for (int k = 1; k <= total_pts; k++) { + atomicAdd(grad_in + pts_idx_of_voxels[k] * channels + channel_idx, + grad_out[0] * cur_grad); } } diff --git a/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh index 545f6ff..7597719 100644 --- a/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh @@ -42,23 +42,23 @@ __global__ void assign_pts_to_box3d(int batch_size, int pts_num, int boxes_num, // params boxes3d: (B, M, 7) // params pts_assign: (B, N, M): idx of the corresponding box3d, -1 means // background points + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.y; int bs_idx = blockIdx.z; - CUDA_1D_KERNEL_LOOP(pt_idx, pts_num) { - if (box_idx >= boxes_num || bs_idx >= batch_size) return; - int assign_idx = - bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx; - pts_assign[assign_idx] = 0; + if (pt_idx >= pts_num || box_idx >= boxes_num || bs_idx >= batch_size) { + return; + } + int assign_idx = bs_idx * pts_num * boxes_num + pt_idx * boxes_num + box_idx; + pts_assign[assign_idx] = 0; - int box_offset = bs_idx * boxes_num * 7 + box_idx * 7; - int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3; + int box_offset = bs_idx * boxes_num * 7 + box_idx * 7; + int pt_offset = bs_idx * pts_num * 3 + pt_idx * 3; - T local_x = 0, local_y = 0; - int cur_in_flag = check_pt_in_box3d(xyz + pt_offset, boxes3d + box_offset, - local_x, local_y); - pts_assign[assign_idx] = cur_in_flag; - } + T local_x = 0, local_y = 0; + int cur_in_flag = check_pt_in_box3d(xyz + pt_offset, boxes3d + box_offset, + local_x, local_y); + pts_assign[assign_idx] = cur_in_flag; } __global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num, @@ -69,32 +69,35 @@ __global__ void get_pooled_idx(int batch_size, int pts_num, int boxes_num, // params pts_assign: (B, N) // params pts_idx: (B, M, 512) // params pooled_empty_flag: (B, M) - CUDA_1D_KERNEL_LOOP(boxes_idx, boxes_num) { - int bs_idx = blockIdx.y; - - int cnt = 0; - for (int k = 0; k < pts_num; k++) { - if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num + - boxes_idx]) { - if (cnt < sampled_pts_num) { - pts_idx[bs_idx * boxes_num * sampled_pts_num + - boxes_idx * sampled_pts_num + cnt] = k; - cnt++; - } else - break; - } + + int boxes_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (boxes_idx >= boxes_num) { + return; + } + + int bs_idx = blockIdx.y; + + int cnt = 0; + for (int k = 0; k < pts_num; k++) { + if (pts_assign[bs_idx * pts_num * boxes_num + k * boxes_num + boxes_idx]) { + if (cnt < sampled_pts_num) { + pts_idx[bs_idx * boxes_num * sampled_pts_num + + boxes_idx * sampled_pts_num + cnt] = k; + cnt++; + } else + break; } + } - if (cnt == 0) { - pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1; - } else if (cnt < sampled_pts_num) { - // duplicate same points for sampling - for (int k = cnt; k < sampled_pts_num; k++) { - int duplicate_idx = k % cnt; - int base_offset = - bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num; - pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx]; - } + if (cnt == 0) { + pooled_empty_flag[bs_idx * boxes_num + boxes_idx] = 1; + } else if (cnt < sampled_pts_num) { + // duplicate same points for sampling + for (int k = cnt; k < sampled_pts_num; k++) { + int duplicate_idx = k % cnt; + int base_offset = + bs_idx * boxes_num * sampled_pts_num + boxes_idx * sampled_pts_num; + pts_idx[base_offset + k] = pts_idx[base_offset + duplicate_idx]; } } } @@ -109,26 +112,33 @@ __global__ void roipoint_pool3d_forward( // params pts_feature: (B, N, C) // params pooled_features: (B, M, 512, 3+C) // params pooled_empty_flag: (B, M) + + int sample_pt_idx = blockIdx.x * blockDim.x + threadIdx.x; int box_idx = blockIdx.y; int bs_idx = blockIdx.z; - CUDA_1D_KERNEL_LOOP(sample_pt_idx, sampled_pts_num) { - if (box_idx >= boxes_num || bs_idx >= batch_size) return; - if (pooled_empty_flag[bs_idx * boxes_num + box_idx]) return; - - int temp_idx = bs_idx * boxes_num * sampled_pts_num + - box_idx * sampled_pts_num + sample_pt_idx; - int src_pt_idx = pts_idx[temp_idx]; - int dst_feature_offset = temp_idx * (3 + feature_in_len); - - for (int j = 0; j < 3; j++) - pooled_features[dst_feature_offset + j] = - xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j]; - - int src_feature_offset = - bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len; - memcpy(pooled_features + dst_feature_offset + 3, - pts_feature + src_feature_offset, feature_in_len * sizeof(T)); + + if (sample_pt_idx >= sampled_pts_num || box_idx >= boxes_num || + bs_idx >= batch_size) { + return; + } + + if (pooled_empty_flag[bs_idx * boxes_num + box_idx]) { + return; } + + int temp_idx = bs_idx * boxes_num * sampled_pts_num + + box_idx * sampled_pts_num + sample_pt_idx; + int src_pt_idx = pts_idx[temp_idx]; + int dst_feature_offset = temp_idx * (3 + feature_in_len); + + for (int j = 0; j < 3; j++) + pooled_features[dst_feature_offset + j] = + xyz[bs_idx * pts_num * 3 + src_pt_idx * 3 + j]; + + int src_feature_offset = + bs_idx * pts_num * feature_in_len + src_pt_idx * feature_in_len; + memcpy(pooled_features + dst_feature_offset + 3, + pts_feature + src_feature_offset, feature_in_len * sizeof(T)); } #endif // ROIPOINT_POOL3D_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh deleted file mode 100644 index ffcc658..0000000 --- a/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh +++ /dev/null @@ -1,129 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -// Modified from -// https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_kernel.cu -#ifndef ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH -#define ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -template -__global__ void rotated_feature_align_forward_kernel( - const int nthreads, const int points, const scalar_t* bottom_data, - const scalar_t* best_bboxes, const scalar_t spatial_scale, - const int channels, const int height, const int width, scalar_t* top_data) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int w = index % width; - int h = (index / width) % height; - int c = (index / width / height) % channels; - int n = index / width / height / channels; - - const scalar_t* bbox_offset = - best_bboxes + ((n * height + h) * width + w) * 5; - scalar_t roi_y = bbox_offset[0] * spatial_scale; - scalar_t roi_x = bbox_offset[1] * spatial_scale; - - scalar_t px[5] = {roi_x, 0, 0, 0, 0}; - scalar_t py[5] = {roi_y, 0, 0, 0, 0}; - - if (points > 1) { - scalar_t roi_w = bbox_offset[2] * spatial_scale; - scalar_t roi_h = bbox_offset[3] * spatial_scale; - scalar_t roi_a = bbox_offset[4]; - - scalar_t w_2 = roi_w / 2, h_2 = roi_h / 2; - scalar_t cosa = cosf(roi_a), sina = sinf(roi_a); - scalar_t wx = cosa * w_2, wy = sina * w_2; - scalar_t hx = -sina * h_2, hy = cosa * h_2; - - px[1] = roi_x + wx + hx; - py[1] = roi_y + wy + hy; - px[2] = roi_x - wx + hx; - py[2] = roi_y - wy + hy; - px[3] = roi_x - wx - hx; - py[3] = roi_y - wy - hy; - px[4] = roi_x + wx - hx; - py[4] = roi_y + wy - hy; - } - - const scalar_t* offset_bottom_data = - bottom_data + (n * channels + c) * height * width; - - scalar_t output_val = bottom_data[index]; - for (int i = 0; i < points; i++) { - output_val += bilinear_interpolate(offset_bottom_data, height, - width, py[i], px[i], i); - } - top_data[index] = output_val; - } -} - -template -__global__ void rotated_feature_align_backward_kernel( - const int nthreads, const int points, const scalar_t* top_diff, - const scalar_t* best_bboxes, const scalar_t spatial_scale, - const int channels, const int height, const int width, - scalar_t* bottom_diff) { - CUDA_1D_KERNEL_LOOP(index, nthreads) { - int w = index % width; - int h = (index / width) % height; - int c = (index / width / height) % channels; - int n = index / width / height / channels; - - const scalar_t* bbox_offset = - best_bboxes + ((n * height + h) * width + w) * 5; - scalar_t roi_y = bbox_offset[0] * spatial_scale; - scalar_t roi_x = bbox_offset[1] * spatial_scale; - - scalar_t px[5] = {roi_x, 0, 0, 0, 0}; - scalar_t py[5] = {roi_y, 0, 0, 0, 0}; - - if (points > 1) { - scalar_t roi_w = bbox_offset[2] * spatial_scale; - scalar_t roi_h = bbox_offset[3] * spatial_scale; - scalar_t roi_a = bbox_offset[4]; - - scalar_t w_2 = roi_w / 2, h_2 = roi_h / 2; - scalar_t cosa = cosf(roi_a), sina = sinf(roi_a); - scalar_t wx = cosa * w_2, wy = sina * w_2; - scalar_t hx = -sina * h_2, hy = cosa * h_2; - - px[1] = roi_x + wx + hx; - py[1] = roi_y + wy + hy; - px[2] = roi_x - wx + hx; - py[2] = roi_y - wy + hy; - px[3] = roi_x - wx - hx; - py[3] = roi_y - wy - hy; - px[4] = roi_x + wx - hx; - py[4] = roi_y + wy - hy; - } - - scalar_t* offset_bottom_diff = - bottom_diff + (n * channels + c) * height * width; - scalar_t value_top_diff = top_diff[index]; - - atomicAdd(bottom_diff + index, value_top_diff); - for (int i = 0; i < points; i++) { - scalar_t w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - - bilinear_interpolate_gradient(height, width, py[i], px[i], w1, - w2, w3, w4, x_low, x_high, y_low, - y_high, i); - scalar_t g1 = value_top_diff * w1; - scalar_t g2 = value_top_diff * w2; - scalar_t g3 = value_top_diff * w3; - scalar_t g4 = value_top_diff * w4; - if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) { - atomicAdd(offset_bottom_diff + y_low * width + x_low, g1); - atomicAdd(offset_bottom_diff + y_low * width + x_high, g2); - atomicAdd(offset_bottom_diff + y_high * width + x_low, g3); - atomicAdd(offset_bottom_diff + y_high * width + x_high, g4); - } - } - } -} -#endif // ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh index af5b9f6..7f9c402 100644 --- a/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh @@ -34,7 +34,7 @@ __device__ __forceinline__ static void reduceMax(double *address, double val) { } // get rid of meaningless warnings when compiling host code -#ifdef MMCV_WITH_HIP +#ifdef HIP_DIFF __device__ __forceinline__ static void reduceAdd(float *address, float val) { atomicAdd(address, val); } @@ -86,7 +86,7 @@ __device__ __forceinline__ static void reduceAdd(double *address, double val) { #endif } #endif // __CUDA_ARCH__ -#endif // MMCV_WITH_HIP +#endif // HIP_DIFF template __global__ void feats_reduce_kernel( diff --git a/mmcv/ops/csrc/common/cuda/spconv/indice.cuh b/mmcv/ops/csrc/common/cuda/spconv/indice.cuh deleted file mode 100644 index 5ef0009..0000000 --- a/mmcv/ops/csrc/common/cuda/spconv/indice.cuh +++ /dev/null @@ -1,236 +0,0 @@ -// Copyright 2019 Yan Yan -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef INDICE_CU_H_ -#define INDICE_CU_H_ -#include -#include - -#include - -template -__global__ void prepareIndicePairsKernel( - tv::TensorView indicesIn, tv::TensorView indicesOut, - tv::TensorView gridsOut, tv::TensorView indicePairs, - tv::TensorView indiceNum, tv::TensorView indicePairUnique, - const tv::SimpleVector kernelSize, - const tv::SimpleVector stride, - const tv::SimpleVector padding, - const tv::SimpleVector dilation, - const tv::SimpleVector outSpatialShape) { - auto numActIn = indicesIn.dim(0); - Index spatialVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - spatialVolume *= outSpatialShape[i]; - } - Index kernelVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - kernelVolume *= kernelSize[i]; - } - Index numValidPoints = 0; - Index validPoints[KernelMaxVolume * (NDim + 1)]; - Index *pointPtr = nullptr; - auto indicePairsDim2 = indicePairs.dim(2); - Index index; - for (int ix : tv::KernelLoopX(numActIn)) { - numValidPoints = getValidOutPos( - indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), - stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), - validPoints); - for (Index i = 0; i < numValidPoints; ++i) { - pointPtr = validPoints + i * (NDim + 1); - auto offset = pointPtr[NDim]; - auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); - indicePairs(offset, 0, oldNum) = ix; - index = tv::rowArrayIdx(pointPtr, outSpatialShape.data()) + - spatialVolume * indicesIn(ix, 0); - indicePairs(offset, 1, oldNum) = index; - indicePairUnique[offset * indicePairsDim2 + oldNum] = index; - } - } -} - -template -__global__ void prepareDeConvIndicePairsKernel( - tv::TensorView indicesIn, tv::TensorView indicesOut, - tv::TensorView gridsOut, tv::TensorView indicePairs, - tv::TensorView indiceNum, tv::TensorView indicePairUnique, - const tv::SimpleVector kernelSize, - const tv::SimpleVector stride, - const tv::SimpleVector padding, - const tv::SimpleVector dilation, - const tv::SimpleVector outSpatialShape) { - auto numActIn = indicesIn.dim(0); - Index spatialVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - spatialVolume *= outSpatialShape[i]; - } - Index kernelVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - kernelVolume *= kernelSize[i]; - } - Index numValidPoints = 0; - Index validPoints[KernelMaxVolume * (NDim + 1)]; - Index *pointPtr = nullptr; - auto indicePairsDim2 = indicePairs.dim(2); - Index index; - for (int ix : tv::KernelLoopX(numActIn)) { - numValidPoints = getValidOutPosTranspose( - indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), - stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), - validPoints); - for (Index i = 0; i < numValidPoints; ++i) { - pointPtr = validPoints + i * (NDim + 1); - auto offset = pointPtr[NDim]; - auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); - indicePairs(offset, 0, oldNum) = ix; - index = tv::rowArrayIdx(pointPtr, outSpatialShape.data()) + - spatialVolume * indicesIn(ix, 0); - indicePairs(offset, 1, oldNum) = index; - indicePairUnique[offset * indicePairsDim2 + oldNum] = index; - } - } -} - -template -__global__ void assignGridAndIndiceOutKernel( - tv::TensorView indicesOut, tv::TensorView gridsOut, - int numAct, tv::TensorView indicePairs, - tv::TensorView indicePairUnique, - const tv::SimpleVector outSpatialShape, int batchSize) { - Index index; - auto indicesOutPtr = indicesOut.data(); - for (int ix : tv::KernelLoopX(numAct)) { - index = indicePairUnique[ix]; - gridsOut[index] = ix; - index = tv::rowArrayIdxInv( - index, indicesOutPtr + ix * (NDim + 1) + 1, outSpatialShape.data()); - indicesOut[ix * (NDim + 1)] = index % batchSize; - } -} - -template -__global__ void assignIndicePairsKernel( - tv::TensorView indicesOut, tv::TensorView gridsOut, - int numActIn, tv::TensorView indicePairs, - tv::TensorView indicePairUnique, - const tv::SimpleVector outSpatialShape) { - Index index; - int kernelVolume = indicePairs.dim(0); - for (int ix : tv::KernelLoopX(numActIn)) { - for (int i = 0; i < kernelVolume; ++i) { - index = indicePairs(i, 1, ix); - if (index > -1) { - indicePairs(i, 1, ix) = gridsOut[index]; - } - } - } -} - -template -__global__ void prepareSubMGridKernel( - tv::TensorView indicesIn, tv::TensorView gridsOut, - const tv::SimpleVector outSpatialShape) { - auto numActIn = indicesIn.dim(0); - Index spatialVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - spatialVolume *= outSpatialShape[i]; - } - Index index = 0; - for (int ix : tv::KernelLoopX(numActIn)) { - index = tv::rowArrayIdx(indicesIn.data() + ix * (NDim + 1) + 1, - outSpatialShape.data()) + - spatialVolume * indicesIn(ix, 0); - gridsOut[index] = ix; - } -} - -template -__global__ void getSubMIndicePairsKernel( - tv::TensorView indicesIn, tv::TensorView gridsOut, - tv::TensorView indicePairs, tv::TensorView indiceNum, - const tv::SimpleVector kernelSize, - const tv::SimpleVector stride, - const tv::SimpleVector padding, - const tv::SimpleVector dilation, - const tv::SimpleVector outSpatialShape) { - auto numActIn = indicesIn.dim(0); - Index spatialVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - spatialVolume *= outSpatialShape[i]; - } - Index numValidPoints = 0; - Index validPoints[KernelMaxVolume * (NDim + 1)]; - Index *pointPtr = nullptr; - Index index = 0; - for (int ix : tv::KernelLoopX(numActIn)) { - numValidPoints = getValidOutPos( - indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(), - stride.data(), padding.data(), dilation.data(), outSpatialShape.data(), - validPoints); - for (int i = 0; i < numValidPoints; ++i) { - pointPtr = validPoints + i * (NDim + 1); - auto offset = pointPtr[NDim]; - index = tv::rowArrayIdx(pointPtr, outSpatialShape.data()) + - spatialVolume * indicesIn(ix, 0); - if (gridsOut[index] > -1) { - auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1)); - indicePairs(offset, 1, oldNum) = gridsOut[index]; - indicePairs(offset, 0, oldNum) = ix; - } - } - } -} - -template -__global__ void resetGridKernel(const Index *indicePairUnique, - tv::TensorView gridsOut, - int numAct) { - for (int ix : tv::KernelLoopX(numAct)) { - gridsOut[indicePairUnique[ix]] = -1; - } -} - -template -__global__ void resetGridSubMKernel( - const Index *indices, tv::TensorView gridsOut, - const tv::SimpleVector outSpatialShape, int numAct) { - int outSpatialShapeReg[NDim]; - for (int i = 0; i < NDim; ++i) { - outSpatialShapeReg[i] = outSpatialShape[i]; - } - Index spatialVolume = 1; - auto indsPtr = indices; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - spatialVolume *= outSpatialShape[i]; - } - Index index; - for (int ix : tv::KernelLoopX(numAct)) { - indsPtr = indices + ix * (NDim + 1); - index = tv::rowArrayIdx(indsPtr + 1, outSpatialShapeReg); - gridsOut[index + spatialVolume * indsPtr[0]] = -1; - } -} - -#endif diff --git a/mmcv/ops/csrc/common/cuda/spconv/reordering.cuh b/mmcv/ops/csrc/common/cuda/spconv/reordering.cuh deleted file mode 100644 index e3ec68b..0000000 --- a/mmcv/ops/csrc/common/cuda/spconv/reordering.cuh +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2019 Yan Yan -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef REORDERING_CU_H_ -#define REORDERING_CU_H_ -#include - -template -__global__ void gatherGenericKernel(scalar_t *buffer, const scalar_t *features, - const Index *indices, int size, - int numPlanes) { - int ILPStrideX[NumILP]; - Index inds[NumILP]; -#pragma unroll - for (int ilp = 0; ilp < NumILP; ilp++) - ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x; - - for (int ix : tv::KernelLoopX(size)) { -#pragma unroll - for (int ilp = 0; ilp < NumILP; ilp++) { - if (ix + ILPStrideX[ilp] < size) - inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes; - } - for (int iy : tv::KernelLoopY(numPlanes)) { -#pragma unroll - for (int ilp = 0; ilp < NumILP; ++ilp) { - if (ix + ILPStrideX[ilp] < size) - buffer[(ix + ILPStrideX[ilp]) * numPlanes + iy] = - features[inds[ilp] + iy]; - } - } - } -} - -template -__global__ void gatherVecKernel(scalar_t *buffer, const scalar_t *features, - const Index *indices, int size, int numPlanes) { - int ILPStrideX[NumILP]; - Index inds[NumILP]; -#pragma unroll - for (int ilp = 0; ilp < NumILP; ilp++) - ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x; - - for (int ix : tv::KernelLoopX(size)) { -#pragma unroll - for (int ilp = 0; ilp < NumILP; ilp++) { - if (ix + ILPStrideX[ilp] < size) - inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes; - } - for (int iy : tv::KernelLoopY(numPlanes)) { -#pragma unroll - for (int ilp = 0; ilp < NumILP; ++ilp) { - if (ix + ILPStrideX[ilp] < size) - reinterpret_cast( - buffer)[(ix + ILPStrideX[ilp]) * numPlanes + iy] = - reinterpret_cast(features)[inds[ilp] + iy]; - } - } - } -} - -template -__global__ void gatherVecBlockKernel(scalar_t *buffer, const scalar_t *features, - const Index *indices, int size, - int numPlanes) { - int ILPStrideY[NumILP]; -#pragma unroll - for (int ilp = 0; ilp < NumILP; ilp++) - ILPStrideY[ilp] = ilp * gridDim.y * blockDim.y; - features += blockIdx.x * NumTLP; - buffer += blockIdx.x * NumTLP; - - for (int iy : tv::KernelLoopY(size)) { -#pragma unroll - for (int ilp = 0; ilp < NumILP; ++ilp) { - reinterpret_cast( - buffer)[(iy + ILPStrideY[ilp]) * numPlanes + threadIdx.x] = - reinterpret_cast( - features)[indices[iy + ILPStrideY[ilp]] * numPlanes + - threadIdx.x]; - } - } -} - -template -__global__ void scatterAddGenericKernel(scalar_t *outFeatures, - const scalar_t *buffer, - const Index *indices, int size, - int numPlanes) { - int ILPStrideX[NumILP]; - Index inds[NumILP]; -#pragma unroll - for (int ilp = 0; ilp < NumILP; ilp++) - ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x; - for (int ix : tv::KernelLoopX(size)) { -#pragma unroll - for (int ilp = 0; ilp < NumILP; ilp++) { - if (ix + ILPStrideX[ilp] < size) - inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes; - } - for (int iy : tv::KernelLoopY(numPlanes)) { -#pragma unroll - for (int ilp = 0; ilp < NumILP; ++ilp) { - if (ix + ILPStrideX[ilp] < size) { - outFeatures[inds[ilp] + iy] += - buffer[(ix + ILPStrideX[ilp]) * numPlanes + iy]; - } - } - } - } -} - -template -__global__ void scatterAddVecBlockKernel(scalar_t *outFeatures, - const scalar_t *buffer, - const Index *indices, int size, - int numPlanes) { - int ILPStrideY[NumILP]; - constexpr int vecloadFactor = sizeof(VecType) / sizeof(scalar_t); -#pragma unroll - for (int ilp = 0; ilp < NumILP; ilp++) - ILPStrideY[ilp] = ilp * gridDim.y * blockDim.y; - outFeatures += blockIdx.x * NumTLP; - buffer += blockIdx.x * NumTLP; - scalar_t buf[vecloadFactor]; - scalar_t buf2[vecloadFactor]; - Index idx; - for (int iy : tv::KernelLoopY(size)) { -#pragma unroll - for (int ilp = 0; ilp < NumILP; ++ilp) { - idx = indices[iy + ILPStrideY[ilp]] * numPlanes + threadIdx.x; - reinterpret_cast(buf)[0] = - reinterpret_cast(outFeatures)[idx]; - reinterpret_cast(buf2)[0] = reinterpret_cast( - buffer)[(iy + ILPStrideY[ilp]) * numPlanes + threadIdx.x]; -#pragma unroll - for (int i = 0; i < vecloadFactor; i++) { - buf[i] += buf2[i]; - } - reinterpret_cast(outFeatures)[idx] = - reinterpret_cast(buf)[0]; - } - } -} - -#endif diff --git a/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh deleted file mode 100644 index 06caefa..0000000 --- a/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved -// Modified from -// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query_gpu.cu -#ifndef STACK_BALL_QUERY_CUDA_KERNEL_CUH -#define STACK_BALL_QUERY_CUDA_KERNEL_CUH - -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif - -template -__global__ void stack_ball_query_forward_cuda_kernel( - int B, int M, float radius, int nsample, const T *new_xyz, - const int *new_xyz_batch_cnt, const T *xyz, const int *xyz_batch_cnt, - int *idx) { - // :param xyz: (N1 + N2 ..., 3) xyz coordinates of the features - // :param xyz_batch_cnt: (batch_size), [N1, N2, ...] - // :param new_xyz: (M1 + M2 ..., 3) centers of the ball query - // :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...] - // output: - // idx: (M, nsample) - const T *cur_xyz = xyz; - int *cur_idx = idx; - CUDA_1D_KERNEL_LOOP(pt_idx, M) { - int bs_idx = 0; - for (int pt_cnt = 0; bs_idx < B; bs_idx++) { - pt_cnt += new_xyz_batch_cnt[bs_idx]; - if (pt_idx < pt_cnt) break; - } - - int xyz_batch_start_idx = 0; - for (int k = 0; k < bs_idx; k++) xyz_batch_start_idx += xyz_batch_cnt[k]; - - const T *new_xyz_p = new_xyz + pt_idx * 3; - cur_xyz += xyz_batch_start_idx * 3; - cur_idx += pt_idx * nsample; - - float radius2 = radius * radius; - T new_x = new_xyz_p[0]; - T new_y = new_xyz_p[1]; - T new_z = new_xyz_p[2]; - int n = xyz_batch_cnt[bs_idx]; - - int cnt = 0; - for (int k = 0; k < n; ++k) { - T x = cur_xyz[k * 3 + 0]; - T y = cur_xyz[k * 3 + 1]; - T z = cur_xyz[k * 3 + 2]; - T d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + - (new_z - z) * (new_z - z); - if (d2 < radius2) { - if (cnt == 0) { - for (int l = 0; l < nsample; ++l) { - cur_idx[l] = k; - } - } - cur_idx[cnt] = k; - ++cnt; - if (cnt >= nsample) break; - } - } - if (cnt == 0) cur_idx[0] = -1; - } -} - -#endif // STACK_BALL_QUERY_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh deleted file mode 100644 index 4ef3663..0000000 --- a/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh +++ /dev/null @@ -1,97 +0,0 @@ -// Copyright (c) OpenMMLab. All rights reserved. -// Modified from -// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu -#ifndef STACK_GROUP_POINTS_CUDA_KERNEL_CUH -#define STACK_GROUP_POINTS_CUDA_KERNEL_CUH -#ifdef MMCV_USE_PARROTS -#include "parrots_cuda_helper.hpp" -#else -#include "pytorch_cuda_helper.hpp" -#endif -#include -template -__global__ void stack_group_points_forward_cuda_kernel( - int b, int c, int m, int nsample, const T *features, - const int *features_batch_cnt, const int *idx, const int *idx_batch_cnt, - T *out) { - // :param features: (N1 + N2 ..., C) tensor of features to group - // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the - // indices of features to group with :param idx: (M1 + M2 ..., nsample) tensor - // containing the indices of features to group with :param idx_batch_cnt: - // (batch_size) [M1 + M2 ...] tensor containing the indices of features to - // group with :return: - // output: (M1 + M2, C, nsample) tensor - CUDA_1D_KERNEL_LOOP(index, m * c * nsample) { - const T *cur_features = features; - const int *cur_idx = idx; - int sample_idx = index % nsample; - int c_idx = (index / nsample) % c; - int pt_idx = (index / nsample / c); - - if (pt_idx >= m || c_idx >= c || sample_idx >= nsample) return; - int bs_idx = 0, pt_cnt = idx_batch_cnt[0]; - for (int k = 1; k < b; k++) { - if (pt_idx < pt_cnt) break; - pt_cnt += idx_batch_cnt[k]; - bs_idx = k; - } - - int features_batch_start_idx = 0; - int features_batch_end_idx = features_batch_cnt[0]; - for (int k = 0; k < bs_idx; k++) { - features_batch_start_idx += features_batch_cnt[k]; - features_batch_end_idx = - features_batch_start_idx + features_batch_cnt[k + 1]; - } - cur_features += features_batch_start_idx * c; - - cur_idx += pt_idx * nsample + sample_idx; - int in_idx = cur_idx[0] * c + c_idx; - int out_idx = pt_idx * c * nsample + c_idx * nsample + sample_idx; - if (in_idx < features_batch_end_idx * c) { - out[out_idx] = cur_features[in_idx]; - } - } -} - -template -__global__ void stack_group_points_backward_cuda_kernel( - int b, int c, int m, int n, int nsample, const T *grad_out, const int *idx, - const int *idx_batch_cnt, const int *features_batch_cnt, T *grad_features) { - // :param grad_out: (M1 + M2 ..., C, nsample) tensor of the gradients of the - // output from forward :param idx: (M1 + M2 ..., nsample) tensor containing - // the indices of features to group with :param idx_batch_cnt: (batch_size) - // [M1 + M2 ...] tensor containing the indices of features to group with - // :param features_batch_cnt: (batch_size) [N1 + N2 ...] tensor containing the - // indices of features to group with :return: - // grad_features: (N1 + N2 ..., C) gradient of the features - CUDA_1D_KERNEL_LOOP(index, m * c * nsample) { - const T *cur_grad_out = grad_out; - const int *cur_idx = idx; - T *cur_grad_features = grad_features; - int sample_idx = index % nsample; - int c_idx = (index / nsample) % c; - int pt_idx = (index / nsample / c); - - if (pt_idx >= m || c_idx >= c || sample_idx >= nsample) return; - - int bs_idx = 0, pt_cnt = idx_batch_cnt[0]; - for (int k = 1; k < b; k++) { - if (pt_idx < pt_cnt) break; - pt_cnt += idx_batch_cnt[k]; - bs_idx = k; - } - - int features_batch_start_idx = 0; - for (int k = 0; k < bs_idx; k++) - features_batch_start_idx += features_batch_cnt[k]; - - cur_grad_out += pt_idx * c * nsample + c_idx * nsample + sample_idx; - cur_idx += pt_idx * nsample + sample_idx; - cur_grad_features += (features_batch_start_idx + cur_idx[0]) * c + c_idx; - - atomicAdd(cur_grad_features, cur_grad_out[0]); - } -} - -#endif // GROUP_POINTS_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh index 971b496..43aecb3 100644 --- a/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh @@ -20,17 +20,17 @@ __global__ void three_interpolate_forward_cuda_kernel( int bs_idx = blockIdx.z; int c_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, n) { - if (bs_idx >= b || c_idx >= c) return; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; - weight += bs_idx * n * 3 + pt_idx * 3; - points += bs_idx * c * m + c_idx * m; - idx += bs_idx * n * 3 + pt_idx * 3; - out += bs_idx * c * n + c_idx * n; + if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; - out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + - weight[2] * points[idx[2]]; - } + weight += bs_idx * n * 3 + pt_idx * 3; + points += bs_idx * c * m + c_idx * m; + idx += bs_idx * n * 3 + pt_idx * 3; + out += bs_idx * c * n + c_idx * n; + + out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + + weight[2] * points[idx[2]]; } template @@ -44,18 +44,18 @@ __global__ void three_interpolate_backward_cuda_kernel( int bs_idx = blockIdx.z; int c_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, n) { - if (bs_idx >= b || c_idx >= c) return; - - grad_out += bs_idx * c * n + c_idx * n + pt_idx; - weight += bs_idx * n * 3 + pt_idx * 3; - grad_points += bs_idx * c * m + c_idx * m; - idx += bs_idx * n * 3 + pt_idx * 3; - - atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]); - atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]); - atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]); - } + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + + if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; + + grad_out += bs_idx * c * n + c_idx * n + pt_idx; + weight += bs_idx * n * 3 + pt_idx * 3; + grad_points += bs_idx * c * m + c_idx * m; + idx += bs_idx * n * 3 + pt_idx * 3; + + atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]); + atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]); + atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]); } #endif // THREE_INTERPOLATE_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh index 1543412..824da4c 100644 --- a/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh @@ -19,49 +19,48 @@ __global__ void three_nn_forward_cuda_kernel(int b, int n, int m, // idx: (B, N, 3) int bs_idx = blockIdx.y; - CUDA_1D_KERNEL_LOOP(pt_idx, n) { - if (bs_idx >= b) return; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || pt_idx >= n) return; - unknown += bs_idx * n * 3 + pt_idx * 3; - known += bs_idx * m * 3; - dist2 += bs_idx * n * 3 + pt_idx * 3; - idx += bs_idx * n * 3 + pt_idx * 3; + unknown += bs_idx * n * 3 + pt_idx * 3; + known += bs_idx * m * 3; + dist2 += bs_idx * n * 3 + pt_idx * 3; + idx += bs_idx * n * 3 + pt_idx * 3; - T ux = unknown[0]; - T uy = unknown[1]; - T uz = unknown[2]; + T ux = unknown[0]; + T uy = unknown[1]; + T uz = unknown[2]; - double best1 = 1e40, best2 = 1e40, best3 = 1e40; - int besti1 = 0, besti2 = 0, besti3 = 0; - for (int k = 0; k < m; ++k) { - T x = known[k * 3 + 0]; - T y = known[k * 3 + 1]; - T z = known[k * 3 + 2]; - T d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); - if (d < best1) { - best3 = best2; - besti3 = besti2; - best2 = best1; - besti2 = besti1; - best1 = d; - besti1 = k; - } else if (d < best2) { - best3 = best2; - besti3 = besti2; - best2 = d; - besti2 = k; - } else if (d < best3) { - best3 = d; - besti3 = k; - } + double best1 = 1e40, best2 = 1e40, best3 = 1e40; + int besti1 = 0, besti2 = 0, besti3 = 0; + for (int k = 0; k < m; ++k) { + T x = known[k * 3 + 0]; + T y = known[k * 3 + 1]; + T z = known[k * 3 + 2]; + T d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); + if (d < best1) { + best3 = best2; + besti3 = besti2; + best2 = best1; + besti2 = besti1; + best1 = d; + besti1 = k; + } else if (d < best2) { + best3 = best2; + besti3 = besti2; + best2 = d; + besti2 = k; + } else if (d < best3) { + best3 = d; + besti3 = k; } - dist2[0] = best1; - dist2[1] = best2; - dist2[2] = best3; - idx[0] = besti1; - idx[1] = besti2; - idx[2] = besti3; } + dist2[0] = best1; + dist2[1] = best2; + dist2[2] = best3; + idx[0] = besti1; + idx[1] = besti2; + idx[2] = besti3; } #endif // THREE_NN_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh index 021b488..62e118b 100644 --- a/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh +++ b/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh @@ -23,20 +23,20 @@ __global__ void dynamic_voxelize_kernel( // To save some computation auto points_offset = points + index * num_features; auto coors_offset = coors + index * NDim; - int c_x = floorf((points_offset[0] - coors_x_min) / voxel_x); + int c_x = floor((points_offset[0] - coors_x_min) / voxel_x); if (c_x < 0 || c_x >= grid_x) { coors_offset[0] = -1; continue; } - int c_y = floorf((points_offset[1] - coors_y_min) / voxel_y); + int c_y = floor((points_offset[1] - coors_y_min) / voxel_y); if (c_y < 0 || c_y >= grid_y) { coors_offset[0] = -1; coors_offset[1] = -1; continue; } - int c_z = floorf((points_offset[2] - coors_z_min) / voxel_z); + int c_z = floor((points_offset[2] - coors_z_min) / voxel_z); if (c_z < 0 || c_z >= grid_z) { coors_offset[0] = -1; coors_offset[1] = -1; @@ -101,7 +101,7 @@ __global__ void point_to_voxelidx_kernel(const T_int* coor, CUDA_1D_KERNEL_LOOP(index, num_points) { auto coor_offset = coor + index * NDim; // skip invalid points - if (coor_offset[0] == -1) continue; + if ((index >= num_points) || (coor_offset[0] == -1)) return; int num = 0; int coor_x = coor_offset[0]; @@ -122,7 +122,7 @@ __global__ void point_to_voxelidx_kernel(const T_int* coor, point_to_pointidx[index] = i; } else if (num >= max_points) { // out of boundary - break; + return; } } } @@ -166,51 +166,4 @@ __global__ void determin_voxel_num( } } -__global__ void nondeterministic_get_assign_pos( - const int nthreads, const int32_t* coors_map, int32_t* pts_id, - int32_t* coors_count, int32_t* reduce_count, int32_t* coors_order) { - CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) { - int coors_idx = coors_map[thread_idx]; - if (coors_idx > -1) { - int32_t coors_pts_pos = atomicAdd(&reduce_count[coors_idx], 1); - pts_id[thread_idx] = coors_pts_pos; - if (coors_pts_pos == 0) { - coors_order[coors_idx] = atomicAdd(coors_count, 1); - } - } - } -} - -template -__global__ void nondeterministic_assign_point_voxel( - const int nthreads, const T* points, const int32_t* coors_map, - const int32_t* pts_id, const int32_t* coors_in, const int32_t* reduce_count, - const int32_t* coors_order, T* voxels, int32_t* coors, int32_t* pts_count, - const int max_voxels, const int max_points, const int num_features, - const int NDim) { - CUDA_1D_KERNEL_LOOP(thread_idx, nthreads) { - int coors_idx = coors_map[thread_idx]; - int coors_pts_pos = pts_id[thread_idx]; - if (coors_idx > -1 && coors_pts_pos < max_points) { - int coors_pos = coors_order[coors_idx]; - if (coors_pos < max_voxels) { - auto voxels_offset = - voxels + (coors_pos * max_points + coors_pts_pos) * num_features; - auto points_offset = points + thread_idx * num_features; - for (int k = 0; k < num_features; k++) { - voxels_offset[k] = points_offset[k]; - } - if (coors_pts_pos == 0) { - pts_count[coors_pos] = min(reduce_count[coors_idx], max_points); - auto coors_offset = coors + coors_pos * NDim; - auto coors_in_offset = coors_in + coors_idx * NDim; - for (int k = 0; k < NDim; k++) { - coors_offset[k] = coors_in_offset[k]; - } - } - } - } - } -} - #endif // VOXELIZATION_CUDA_KERNEL_CUH diff --git a/mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu deleted file mode 100644 index 0f273d2..0000000 --- a/mmcv/ops/csrc/common/mlu/bbox_overlaps_mlu_kernel.mlu +++ /dev/null @@ -1,322 +0,0 @@ -/************************************************************************* - * Copyright (C) 2021 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include - -#include "common_mlu_helper.hpp" - -#define COORD_NUM 4 - -__nram__ char nmem_buf[MAX_NRAM_SIZE]; - -template -__mlu_func__ void computeDiv(void *nram_dst, void *nram_src0, void *nram_src1, - void *nram_addition, const int32_t deal_num) { - __bang_active_reciphp((T *)nram_dst, (T *)nram_src1, deal_num); - __bang_mul((T *)nram_dst, (T *)nram_src0, (T *)nram_dst, deal_num); -} - -template <> -__mlu_func__ void computeDiv(void *nram_dst, void *nram_src0, - void *nram_src1, void *nram_addition, - const int32_t deal_num) { - __bang_half2float((float *)nram_addition, (half *)nram_src1, deal_num); - __bang_active_reciphp((float *)nram_addition, (float *)nram_addition, - deal_num); - __bang_float2half_rd((half *)nram_src1, (float *)nram_addition, deal_num); - __bang_mul((half *)nram_dst, (half *)nram_src0, (half *)nram_src1, deal_num); -} - -template -__mlu_func__ void bboxOverlapsWorkflow( - T *vec_b1_x1, T *vec_b1_y1, T *vec_b1_x2, T *vec_b1_y2, T *vec_b2_x1, - T *vec_b2_y1, T *vec_b2_x2, T *vec_b2_y2, T *vec_left, T *vec_right, - T *vec_top, T *vec_bottom, const T *bbox1, const T *bbox2, void *ious, - const int32_t offset, const int32_t mode, const int32_t batches_stride, - const int32_t num_bbox1, const int32_t num_bbox2, const bool aligned) { - int32_t task_batch_stride = (num_bbox1 + taskDim - 1) / taskDim; - int32_t batch_start = taskId * task_batch_stride; - int32_t batch_per_task = batch_start + task_batch_stride < num_bbox1 - ? task_batch_stride - : num_bbox1 - batch_start; - batch_per_task = batch_per_task > 0 ? batch_per_task : (0); - - if (aligned) { - int32_t num_loop_cpy = batch_per_task / batches_stride; - int32_t num_rem_cpy_batches = batch_per_task % batches_stride; - num_loop_cpy = num_rem_cpy_batches > 0 ? num_loop_cpy + 1 : num_loop_cpy; - for (int32_t i = 0; i < num_loop_cpy; i++) { - int32_t index = batch_start + i * batches_stride; - int32_t handle_batches = index + batches_stride > num_bbox1 - ? num_rem_cpy_batches - : batches_stride; - int32_t b1 = index; - int32_t b2 = index; - - int32_t base1 = b1 * COORD_NUM; - __memcpy(vec_b1_x1, &bbox1[base1], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b1_y1, &bbox1[base1 + 1], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b1_x2, &bbox1[base1 + 2], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b1_y2, &bbox1[base1 + 3], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - - int32_t base2 = b2 * COORD_NUM; - __memcpy(vec_b2_x1, &bbox2[base2], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b2_y1, &bbox2[base2 + 1], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b2_x2, &bbox2[base2 + 2], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b2_y2, &bbox2[base2 + 3], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - // get the width and height - __bang_maxequal(vec_left, vec_b1_x1, vec_b2_x1, batches_stride); - __bang_minequal(vec_right, vec_b1_x2, vec_b2_x2, batches_stride); - __bang_maxequal(vec_top, vec_b1_y1, vec_b2_y1, batches_stride); - __bang_minequal(vec_bottom, vec_b1_y2, vec_b2_y2, batches_stride); - - // right - left + offset ---> left - __bang_sub(vec_left, vec_right, vec_left, batches_stride); - __bang_add_scalar(vec_left, vec_left, (T)offset, batches_stride); - - // bottom - top + offset ---> right - __bang_sub(vec_right, vec_bottom, vec_top, batches_stride); - __bang_add_scalar(vec_right, vec_right, (T)offset, batches_stride); - - // zero vector ---> bottom - __bang_write_value(vec_bottom, batches_stride, 0.f); - - // width --> vec_left - __bang_maxequal(vec_left, vec_bottom, vec_left, batches_stride); - T *width = vec_left; - // height --> vec_right - __bang_maxequal(vec_right, vec_bottom, vec_right, batches_stride); - T *height = vec_right; - - // get the b1_area - // (b1_x2 - b1_x1 + offset) ---> vec_top - __bang_sub(vec_top, vec_b1_x2, vec_b1_x1, batches_stride); - __bang_add_scalar(vec_top, vec_top, (T)offset, batches_stride); - - // (b1_y2 - b1_y1 + offset) ---> vec_bottom - __bang_sub(vec_bottom, vec_b1_y2, vec_b1_y1, batches_stride); - __bang_add_scalar(vec_bottom, vec_bottom, (T)offset, batches_stride); - - // b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset) - // ---> vec_top; - __bang_mul(vec_top, vec_top, vec_bottom, batches_stride); - T *b1_area = vec_top; - - // get the b2_area - // (b2_x2 - b2_x1 + offset) ---> b2_x1 - __bang_sub(vec_b2_x1, vec_b2_x2, vec_b2_x1, batches_stride); - __bang_add_scalar(vec_b2_x1, vec_b2_x1, (T)offset, batches_stride); - - // (b2_y2 - b2_y1 + offset) ---> b2_y1 - __bang_sub(vec_b2_y1, vec_b2_y2, vec_b2_y1, batches_stride); - __bang_add_scalar(vec_b2_y1, vec_b2_y1, (T)offset, batches_stride); - - // b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset) - // ---> b2_x1; - __bang_mul(vec_b2_x1, vec_b2_x1, vec_b2_y1, batches_stride); - T *b2_area = vec_b2_x1; - - // inter_s = width * height - __bang_mul(height, width, height, batches_stride); - T *inter_s = height; - - // offset vector ---> vec_b2_y1 - __bang_write_value(vec_b2_y1, batches_stride, T(offset)); - T *vec_offset = vec_b2_y1; - - if (mode == 0) { - __bang_add(b1_area, b1_area, b2_area, batches_stride); - __bang_sub(b1_area, b1_area, inter_s, batches_stride); - __bang_maxequal(b1_area, vec_offset, b1_area, batches_stride); - } else { - __bang_maxequal(b1_area, vec_offset, b1_area, batches_stride); - } - T *base_s = b1_area; - - // ious = inter_s / base_s - computeDiv(width, inter_s, base_s, vec_b2_x2, batches_stride); - __memcpy((T *)ious + index, width, handle_batches * sizeof(T), - NRAM2GDRAM); - } - } else { - int32_t num_loop_cpy = num_bbox2 / batches_stride; - int32_t num_rem_cpy_batches = num_bbox2 % batches_stride; - num_loop_cpy = num_rem_cpy_batches > 0 ? num_loop_cpy + 1 : num_loop_cpy; - for (int32_t i = 0; i < batch_per_task; i++) { - int32_t index1 = batch_start + i; - int32_t b1 = index1; - int32_t base1 = b1 * COORD_NUM; - - // set bbox1 and bbox2 to nram - __bang_write_value(vec_b1_x1, batches_stride, bbox1[base1]); - __bang_write_value(vec_b1_y1, batches_stride, bbox1[base1 + 1]); - __bang_write_value(vec_b1_x2, batches_stride, bbox1[base1 + 2]); - __bang_write_value(vec_b1_y2, batches_stride, bbox1[base1 + 3]); - - for (int32_t j = 0; j < num_loop_cpy; j++) { - int32_t index2 = j * batches_stride; - int32_t handle_batches = index2 + batches_stride > num_bbox2 - ? num_rem_cpy_batches - : batches_stride; - int32_t b2 = index2; - int32_t base2 = b2 * COORD_NUM; - - // copy bbox2 to nram - __memcpy(vec_b2_x1, &bbox2[base2], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b2_y1, &bbox2[base2 + 1], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b2_x2, &bbox2[base2 + 2], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - __memcpy(vec_b2_y2, &bbox2[base2 + 3], sizeof(T), GDRAM2NRAM, sizeof(T), - COORD_NUM * sizeof(T), handle_batches - 1); - - // get the width and height - __bang_maxequal(vec_left, vec_b1_x1, vec_b2_x1, batches_stride); - __bang_minequal(vec_right, vec_b1_x2, vec_b2_x2, batches_stride); - __bang_maxequal(vec_top, vec_b1_y1, vec_b2_y1, batches_stride); - __bang_minequal(vec_bottom, vec_b1_y2, vec_b2_y2, batches_stride); - - // right - left + offset ---> left - __bang_sub(vec_left, vec_right, vec_left, batches_stride); - __bang_add_scalar(vec_left, vec_left, (T)offset, batches_stride); - // bottom - top + offset ---> right - __bang_sub(vec_right, vec_bottom, vec_top, batches_stride); - __bang_add_scalar(vec_right, vec_right, (T)offset, batches_stride); - - // zero vector ---> bottom - __bang_write_value(vec_bottom, batches_stride, (T)0); - - // width --> vec_left - __bang_maxequal(vec_left, vec_bottom, vec_left, batches_stride); - T *width = vec_left; - // height --> vec_right - __bang_maxequal(vec_right, vec_bottom, vec_right, batches_stride); - T *height = vec_right; - - // get the b1_area - // (b1_x2 - b1_x1 + offset) ---> vec_top - __bang_sub(vec_top, vec_b1_x2, vec_b1_x1, batches_stride); - __bang_add_scalar(vec_top, vec_top, (T)offset, batches_stride); - // (b1_y2 - b1_y1 + offset) ---> vec_bottom - __bang_sub(vec_bottom, vec_b1_y2, vec_b1_y1, batches_stride); - __bang_add_scalar(vec_bottom, vec_bottom, (T)offset, batches_stride); - // b1_area = (b1_x2 - b1_x1 + offset) * (b1_y2 - b1_y1 + offset) - // ---> vec_top; - __bang_mul(vec_top, vec_top, vec_bottom, batches_stride); - T *b1_area = vec_top; - - // get the b2_area - // (b2_x2 - b2_x1 + offset) ---> b2_x1 - __bang_sub(vec_b2_x1, vec_b2_x2, vec_b2_x1, batches_stride); - __bang_add_scalar(vec_b2_x1, vec_b2_x1, (T)offset, batches_stride); - // (b2_y2 - b2_y1 + offset) ---> b2_y1 - __bang_sub(vec_b2_y1, vec_b2_y2, vec_b2_y1, batches_stride); - __bang_add_scalar(vec_b2_y1, vec_b2_y1, (T)offset, batches_stride); - // b2_area = (b2_x2 - b2_x1 + offset) * (b2_y2 - b2_y1 + offset) - // ---> b2_x1; - __bang_mul(vec_b2_x1, vec_b2_x1, vec_b2_y1, batches_stride); - T *b2_area = vec_b2_x1; - - // inter_s = width * height - __bang_mul(height, width, height, batches_stride); - T *inter_s = height; - - // offset vector ---> vec_b2_y1 - __bang_write_value(vec_b2_y1, batches_stride, T(offset)); - T *vec_offset = vec_b2_y1; - - if (mode == 0) { - __bang_add(b1_area, b1_area, b2_area, batches_stride); - __bang_sub(b1_area, b1_area, inter_s, batches_stride); - __bang_maxequal(b1_area, vec_offset, b1_area, batches_stride); - } else { - __bang_maxequal(b1_area, vec_offset, b1_area, batches_stride); - } - T *base_s = b1_area; - - // ious = inter_s / base_s - computeDiv(width, inter_s, base_s, vec_b2_x2, batches_stride); - int32_t gdram_offset = index1 * num_bbox2 + index2; - __memcpy((T *)ious + gdram_offset, width, handle_batches * sizeof(T), - NRAM2GDRAM); - } - } - } -} - -template -__mlu_global__ void MLUUnion1KernelBBoxOverlaps( - const void *bbox1, const void *bbox2, void *ious, const int32_t num_bbox1, - const int32_t num_bbox2, const int32_t mode, const bool aligned, - const int32_t offset) { - /* - * NRAM partition - * |-------------------------------------------------------------| - * | vec_b1_x1 | vec_b1_y1 | vec_b1_x2 | vec_b1_y2 | - * |-------------------------------------------------------------| - * | vec_b2_x1 | vec_b2_y1 | vec_b2_x2 | vec_b2_y2 | - * |-------------------------------------------------------------| - * | vec_left | vec_right | vec_top | vec_bottom | - * |-------------------------------------------------------------| - * - */ - const int32_t align_bytes = PAD_DOWN(MAX_NRAM_SIZE, NFU_ALIGN_SIZE); - const int32_t split_nram_num = 12; - const int32_t nram_stride = - align_bytes / NFU_ALIGN_SIZE / split_nram_num * NFU_ALIGN_SIZE; - - void *vec_b1_x1 = nmem_buf; - void *vec_b1_y1 = nmem_buf + nram_stride; - void *vec_b1_x2 = nmem_buf + 2 * nram_stride; - void *vec_b1_y2 = nmem_buf + 3 * nram_stride; - - void *vec_b2_x1 = nmem_buf + 4 * nram_stride; - void *vec_b2_y1 = nmem_buf + 5 * nram_stride; - void *vec_b2_x2 = nmem_buf + 6 * nram_stride; - void *vec_b2_y2 = nmem_buf + 7 * nram_stride; - - void *vec_left = nmem_buf + 8 * nram_stride; - void *vec_right = nmem_buf + 9 * nram_stride; - void *vec_top = nmem_buf + 10 * nram_stride; - void *vec_bottom = nmem_buf + 11 * nram_stride; - - const int32_t vec_length = nram_stride / sizeof(T); - bboxOverlapsWorkflow((T *)vec_b1_x1, (T *)vec_b1_y1, (T *)vec_b1_x2, - (T *)vec_b1_y2, (T *)vec_b2_x1, (T *)vec_b2_y1, - (T *)vec_b2_x2, (T *)vec_b2_y2, (T *)vec_left, - (T *)vec_right, (T *)vec_top, (T *)vec_bottom, - (T *)bbox1, (T *)bbox2, (T *)ious, offset, mode, - vec_length, num_bbox1, num_bbox2, aligned); -} - -void KernelBBoxOverlaps(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, const cnrtDataType_t d_type, - const void *bbox1, const void *bbox2, void *ious, - const int32_t num_bbox1, const int32_t num_bbox2, - const int32_t mode, const bool aligned, - const int32_t offset) { - if (d_type == CNRT_FLOAT16) { - MLUUnion1KernelBBoxOverlaps<<>>( - bbox1, bbox2, ious, num_bbox1, num_bbox2, mode, aligned, offset); - } else { - MLUUnion1KernelBBoxOverlaps<<>>( - bbox1, bbox2, ious, num_bbox1, num_bbox2, mode, aligned, offset); - } -} diff --git a/mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu deleted file mode 100644 index 8dd6a8e..0000000 --- a/mmcv/ops/csrc/common/mlu/carafe_mlu_kernel.mlu +++ /dev/null @@ -1,552 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "carafe_utils.hpp" -#include "common_mlu_helper.hpp" - -#define INDEX3(n, h, w, c, strN, strH, strW) \ - (strN) * (n) + (strH) * (h) + (strW) * (w) + (c) - -#define NRAM_BLOCK PAD_DOWN(MAX_NRAM_SIZE / 5, NRAM_ALIGN_SIZE) - -__nram__ char nram_buf[MAX_NRAM_SIZE]; - -namespace forward { -struct BlockId { - int Ho; - int Wo; - int G; - int Cg; - int Kh; - int Kw; - int Hi; - int Wi; -}; - -// start indices of block -struct BlockStart { - int Ho; - int Wo; - int G; - int Cg; - int Kh; - int Kw; - int Hi; - int Wi; - int C; -}; - -struct BlockEnd { - int Ho; - int Wo; - int Kh; - int Kw; - int Hi; - int Wi; -}; - -struct BlockSize { - int Ho; - int Wo; - int G; - int Cg; - int Kh; - int Kw; - int Hi; - int Wi; -}; - -template -__mlu_func__ void carafeForwardBLOCK(T *input, T *mask, - const CarafeForwardParam param, - const CarafeForwardBlockDim block_dim, - const CarafeForwardGridDim grid_dim, - T *output) { - // data block info - BlockId blkId; - BlockStart blkStart; - BlockEnd blkEnd; - BlockSize blkSize; - - // set pointers on NRAM arrays - - // input_nram[blkDim_(Hi+Kh)-1, blkDim_(Wi+Kw)-1, blkDim_(G*Cg)] - T *input_nram = (T *)nram_buf; - - // mask_nram[blkDim_Ho, blkDim_Wo, blkDim_(G*Kh*Kw)] - T *mask_nram = input_nram + param.input_nram_size; - - // output_nram[blkDim_Ho, blkDim_Wo, blkDim_(G*Cg)] - T *output_nram = mask_nram + param.mask_nram_size; - - // sum_array[blkDim_(G*Cg)] - T *sum_array = output_nram + param.output_nram_size; - - /* ===== loop over N, grid_dim(Ho,Wo,G,Cg) - * iterations are distributed over computing cores - */ - for (int loop_index = taskId; loop_index < param.job_num; - loop_index += taskDim) { - // block idx - blkId.Cg = loop_index; - blkId.G = blkId.Cg / grid_dim.Cg; - blkId.Wo = blkId.G / grid_dim.G; - blkId.Ho = blkId.Wo / grid_dim.Wo; - int sample_idx = blkId.Ho / grid_dim.Ho; - - blkId.Cg %= grid_dim.Cg; - blkId.G %= grid_dim.G; - blkId.Wo %= grid_dim.Wo; - blkId.Ho %= grid_dim.Ho; - - // block starting indices - blkStart.Ho = blkId.Ho * block_dim.Ho; - blkStart.Wo = blkId.Wo * block_dim.Wo; - blkStart.G = blkId.G * block_dim.G; - blkStart.Cg = blkId.Cg * block_dim.Cg; - blkStart.C = blkStart.G * param.Cg + blkStart.Cg; - - // block size - blkSize.Ho = block_dim.Ho; - blkSize.Wo = block_dim.Wo; - blkSize.G = block_dim.G; - blkSize.Cg = block_dim.Cg; - - // take care of blocks near the end of each dimension - if (blkId.Ho == (grid_dim.Ho - 1)) { - blkSize.Ho = param.Ho - (grid_dim.Ho - 1) * block_dim.Ho; - } - if (blkId.Wo == (grid_dim.Wo - 1)) { - blkSize.Wo = param.Wo - (grid_dim.Wo - 1) * block_dim.Wo; - } - if (blkId.G == (grid_dim.G - 1)) { - blkSize.G = param.group_size - (grid_dim.G - 1) * block_dim.G; - } - if (blkId.Cg == (grid_dim.Cg - 1)) { - blkSize.Cg = param.Cg - (grid_dim.Cg - 1) * block_dim.Cg; - } - - // block end indices - blkEnd.Ho = blkStart.Ho + blkSize.Ho - 1; - blkEnd.Wo = blkStart.Wo + blkSize.Wo - 1; - - // set output_nram to zero - __bang_write_value(output_nram, param.output_nram_size, T(0)); - - // loop blocks of kernel window: grid_dim.(Kh, Kw) - for (blkId.Kh = 0; blkId.Kh < grid_dim.Kh; ++blkId.Kh) { - blkStart.Kh = blkId.Kh * block_dim.Kh; - blkSize.Kh = block_dim.Kh; - if (blkId.Kh == (grid_dim.Kh - 1)) { - blkSize.Kh = param.kernel_size - (grid_dim.Kh - 1) * block_dim.Kh; - } - blkEnd.Kh = blkStart.Kh + blkSize.Kh - 1; - - blkStart.Hi = blkStart.Ho / param.scale_factor - param.kernel_size_half + - blkStart.Kh; - blkEnd.Hi = - blkEnd.Ho / param.scale_factor - param.kernel_size_half + blkEnd.Kh; - blkSize.Hi = blkEnd.Hi - blkStart.Hi + 1; - - for (blkId.Kw = 0; blkId.Kw < grid_dim.Kw; ++blkId.Kw) { - blkStart.Kw = blkId.Kw * block_dim.Kw; - blkSize.Kw = block_dim.Kw; - if (blkId.Kw == (grid_dim.Kw - 1)) { - blkSize.Kw = param.kernel_size - (grid_dim.Kw - 1) * block_dim.Kw; - } - blkEnd.Kw = blkStart.Kw + blkSize.Kw - 1; - - blkStart.Wi = blkStart.Wo / param.scale_factor - - param.kernel_size_half + blkStart.Kw; - blkEnd.Wi = - blkEnd.Wo / param.scale_factor - param.kernel_size_half + blkEnd.Kw; - blkSize.Wi = blkEnd.Wi - blkStart.Wi + 1; - - // load input block from gdram2nram - // - // input_nram[ | input[ sample_idx, - // 0:blkSize.Hi-1, | blkStart.Hi + 0:blkSize.Hi-1, - // 0:blkSize.Wi-1, | blkStart.Wi + 0:blkSize.Wi-1, - // 0:blkSize.G-1 | blkStart.G + 0:blkSize.G-1 - // 0:blkSize.Cg-1] | blkStart.Cg + 0:blkSize.Cg-1] - // - // To skip out of bound indices: - // - // input_nram[ - // hi_start_local:hi_end_local, - // wi_start_local:wi_end_local, ...] - // = input[n, - // hi_start_global:hi_end_global, - // wi_start_global:wi_end_global, ...] - // - int hi_start_local = 0; - int hi_start_global = blkStart.Hi; - if (blkStart.Hi < 0) { - hi_start_local = -blkStart.Hi; - hi_start_global = 0; - } - int wi_start_local = 0; - int wi_start_global = blkStart.Wi; - if (blkStart.Wi < 0) { - wi_start_local = -blkStart.Wi; - wi_start_global = 0; - } - int hi_end_local = blkSize.Hi - 1; - int hi_end_global = blkEnd.Hi; - if (blkEnd.Hi > param.Hi - 1) { - hi_end_global = param.Hi - 1; - hi_end_local -= blkEnd.Hi - hi_end_global; - } - int wi_end_local = blkSize.Wi - 1; - int wi_end_global = blkEnd.Wi; - if (blkEnd.Wi > param.Wi - 1) { - wi_end_global = param.Wi - 1; - wi_end_local -= blkEnd.Wi - wi_end_global; - } - - int dst_offset = param.input_nram_stride_h * hi_start_local + - param.input_nram_stride_w * wi_start_local; - T *dst = input_nram + dst_offset; - - int src_offset = INDEX3(sample_idx, hi_start_global, wi_start_global, - blkStart.C, param.input_stride_n, - param.input_stride_h, param.input_stride_w); - T *src = input + src_offset; - - int input_seg_num_h = hi_end_local - hi_start_local + 1; - int input_seg_num_w = wi_end_local - wi_start_local + 1; - for (int i = 0; i < input_seg_num_h; ++i) { - loadStr3D(dst, src, blkSize.Cg, blkSize.G, input_seg_num_w, - param.input_nram_stride_g, param.input_nram_stride_w, - param.input_stride_g, param.input_stride_w); - dst += param.input_nram_stride_h; - src += param.input_stride_h; - } - - /* load mask block from gdram2nram - * - * mask_nram[ | mask[sample_idx, - * 0:blkSize.Ho-1 , | blkStart.Ho + 0:blkSize.Ho-1, - * 0:blkSize.Wo-1, | blkStart.Wo + 0:blkSize.Wo-1, - * 0:blkSize.G-1, | blkStart.G + 0:blkSize.G-1, - * 0:blkSize.Kh-1, | blkStart.Kh + 0:blkSize.Kh-1, - * 0:blkSize.Kw-1] | blkStart.Kw + 0:blkSize.Kw-1] - */ - src_offset = INDEX3(blkStart.Wo, blkStart.G, blkStart.Kh, blkStart.Kw, - param.mask_stride_w, param.mask_stride_g, - param.mask_stride_kh); - src_offset += sample_idx * param.mask_stride_n + - blkStart.Ho * param.mask_stride_h; - - for (int ho = 0; ho < blkSize.Ho; ++ho) { - dst = mask_nram + ho * param.mask_nram_stride_h; - src = mask + src_offset + ho * param.mask_stride_h; - - for (int wo = 0; wo < blkSize.Wo; ++wo) { - loadStr3D(dst, src, blkSize.Kw, blkSize.Kh, blkSize.G, - param.mask_nram_stride_kh, param.mask_nram_stride_g, - param.mask_stride_kh, param.mask_stride_g); - dst += param.mask_nram_stride_w; - src += param.mask_stride_w; - } - } - - // loop each pixel of the output block - for (int ho = 0; ho < blkSize.Ho; ++ho) { - int kernel_hi_start_global = (blkStart.Ho + ho) / param.scale_factor - - param.kernel_size_half + blkStart.Kh; - int kernel_hi_start_local = kernel_hi_start_global - blkStart.Hi; - - // int kernel_hi_end_global = kernel_hi_start_global + blkSize.Kh - 1; - // int kernel_hi_end_local = kernel_hi_end_global - blkStart.Hi; - - // exclude out of bound indices which should be ignored - int kh_min = hi_start_local - kernel_hi_start_local > 0 - ? hi_start_local - kernel_hi_start_local - : 0; - int kh_max = hi_end_local - kernel_hi_start_local < blkSize.Kh - 1 - ? hi_end_local - kernel_hi_start_local - : blkSize.Kh - 1; - - for (int wo = 0; wo < blkSize.Wo; ++wo) { - int kernel_wi_start_global = - (blkStart.Wo + wo) / param.scale_factor - - param.kernel_size_half + blkStart.Kw; - int kernel_wi_start_local = kernel_wi_start_global - blkStart.Wi; - - // exclude out of bound indices wwich should be ignored - int kw_min = wi_start_local - kernel_wi_start_local > 0 - ? wi_start_local - kernel_wi_start_local - : 0; - int kw_max = wi_end_local - kernel_wi_start_local < blkSize.Kw - 1 - ? wi_end_local - kernel_wi_start_local - : blkSize.Kw - 1; - - // output_nram[ho, wo, g, c] = sum(mask_nram[ho, wo, g, kh, kw] - // * input_nram[hi+kh, wi+kw, g, c], - // for (kh,kw) in [0:blkSize.Kw-1] x [0:blkSize.Kh-1]) - // - // sum(mask_nram[ho, wo, g, kh, kw] - // * input_nram[hi+kh, wi+kw, g, c], (kh,kw)) - // - T *mask_array = mask_nram + param.mask_nram_stride_h * ho + - param.mask_nram_stride_w * wo; - - for (int kh = kh_min; kh <= kh_max; ++kh) { - for (int kw = kw_min; kw <= kw_max; ++kw) { - T *src = - input_nram + - param.input_nram_stride_h * (kernel_hi_start_local + kh) + - param.input_nram_stride_w * (kernel_wi_start_local + kw); - - int mask_index = param.mask_nram_stride_kh * kh + kw; - - // mlutiply mask weight with channels for each channel group - T *sum = sum_array; - - for (int g = 0; g < blkSize.G; ++g) { - __bang_mul_scalar(sum, src, mask_array[mask_index], - param.block_Cg_NFU); - // - // NOTE: Since block_Cg_NFU >= block_Cg_stride, - // overlapped writing may occur on sum_array. - // So this loop must be executed in order to - // avoid data contamination, as shown below. - // - // |-----block_Cg_NFU---------| - // xxxxxxxxxxxxxxxxxxxxyyyzzzzz------------ - // |---block_Cg_stride---|^^^^^will be overwritten - // in the next iteration. - // - // x: actual data used, y: not used, z: overwritten - // - sum += param.input_nram_stride_g; - src += param.input_nram_stride_g; - mask_index += param.mask_nram_stride_g; - } // loop blk_G - - // add array[blk_G * blk_C] to output_nram - dst = output_nram + param.output_nram_stride_h * ho + - param.output_nram_stride_w * wo; - - __bang_add(dst, dst, sum_array, param.output_nram_stride_w); - } // end loop blk_Kw - } // end loop blk_Kh - } // end loop blk_Wo - } // end loop blk_Ho - } // end loop grid_dim.Kw - } // end loop grid_dim.Kh - - /* write output from nram2gdram - * - * output_nram[ | output[sample_idx, - * 0:blkSize.Ho-1, | blkStart.Ho + 0:blkSize.Ho-1, - * 0:blkSize.Wo-1, | blkStart.Wo + 0:blkSize.Wo-1, - * 0:blkSize.G-1, | blkStart.G + 0:blkSize.G-1, - * 0:blkSize.Cg-1] | blkStart.Cg + 0:blkSize.Cg-1] - */ - int dst_offset = INDEX3(sample_idx, blkStart.Ho, blkStart.Wo, blkStart.C, - param.output_stride_n, param.output_stride_h, - param.output_stride_w); - T *dst = output + dst_offset; - T *src = output_nram; - for (int i = 0; i < blkSize.Ho; ++i) { - storeStr3D(dst, src, blkSize.Cg, blkSize.G, blkSize.Wo, - param.output_stride_g, param.output_stride_w, - param.output_nram_stride_g, param.output_nram_stride_w); - dst += param.output_stride_h; - src += param.output_nram_stride_h; - } - } // end loop N, grid_dim.(Hi,Wi,G,Cg) -} - -template -__mlu_global__ void MLUBLOCKKernelCarafeForward( - const void *input, const void *mask, const CarafeForwardParam param, - const CarafeForwardBlockDim block_dim, const CarafeForwardGridDim grid_dim, - void *output) { - carafeForwardBLOCK((T *)input, (T *)mask, param, block_dim, grid_dim, - (T *)output); -} -} // namespace forward - -namespace backward { -template -__mlu_func__ void CarafeCompute(T *input, T *mask, T *grad_output, - T *grad_input, T *grad_mask, const int n, - const int hi, const int wi, const int c, - const int k_up, const int group, - const int scale) { - char *input_buff = nram_buf; - char *mask_buff = input_buff + NRAM_BLOCK; - char *grad_input_buff = mask_buff + NRAM_BLOCK; - char *grad_output_buff = grad_input_buff + NRAM_BLOCK; - char *grad_mask_buff = grad_output_buff + NRAM_BLOCK; - - int wo = wi * scale; - int ho = hi * scale; - int out_num = n * ho * wo * group; - int group_size = c / group; - int repeat = out_num / taskDim + (int)(taskId < out_num % taskDim); - int num_align = PAD_DOWN(NRAM_BLOCK / sizeof(T), NFU_ALIGN_SIZE / sizeof(T)); - int num_per_loop = group_size / num_align; - int rem_for_loop = group_size % num_align; - int rem_for_loop_align = PAD_UP(rem_for_loop, NFU_ALIGN_SIZE / sizeof(T)); - for (int k = 0; k < repeat; k++) { - int iter = k * taskDim + taskId; - int group_k = iter % group; - int w_k = (iter / group) % wo; - int h_k = (iter / wo / group) % ho; - int n_k = (iter / ho / wo / group) % n; - int h_i = h_k / scale; - int w_i = w_k / scale; - int start_h = h_i - ((k_up - 1) / 2); - int end_h = h_i + ((k_up - 1) / 2) + 1; - int start_w = w_i - ((k_up - 1) / 2); - int end_w = w_i + ((k_up - 1) / 2) + 1; - T *base_mask = (T *)mask + n_k * ho * wo * group * k_up * k_up + - h_k * wo * group * k_up * k_up + w_k * group * k_up * k_up + - group_k * k_up * k_up; - T *base_grad_mask = (T *)grad_mask + n_k * ho * wo * group * k_up * k_up + - h_k * wo * group * k_up * k_up + - w_k * group * k_up * k_up + group_k * k_up * k_up; - - __bang_write_zero((T *)grad_input_buff, NRAM_BLOCK / sizeof(T)); - __bang_write_zero((T *)grad_mask_buff, NRAM_BLOCK / sizeof(T)); - __bang_write_zero((T *)grad_output_buff, NRAM_BLOCK / sizeof(T)); - - __memcpy((T *)mask_buff, (T *)base_mask, k_up * k_up * sizeof(T), - GDRAM2NRAM); - for (int i = 0; i < num_per_loop; i++) { - __bang_write_zero((T *)input_buff, NRAM_BLOCK / sizeof(T)); - T *base_grad_output = (T *)grad_output + n_k * ho * wo * c + - h_k * wo * c + w_k * c + group_k * group_size + - i * num_align; - __memcpy((T *)grad_output_buff, (T *)base_grad_output, - num_align * sizeof(T), GDRAM2NRAM); - for (int ih = start_h; ih < end_h; ih++) { - for (int iw = start_w; iw < end_w; iw++) { - if (ih < 0 || ih > hi - 1 || iw < 0 || iw > wi - 1) { - continue; - } - int mask_ih = ih - h_i + (k_up - 1) / 2; - int mask_iw = iw - w_i + (k_up - 1) / 2; - int mask_index = mask_ih * k_up + mask_iw; - int input_index = n_k * hi * wi * c + ih * wi * c + iw * c + - group_k * group_size + i * num_align; - T *base_input = (T *)input + input_index; - T *base_grad_input = (T *)grad_input + input_index; - __memcpy((T *)input_buff, (T *)base_input, num_align * sizeof(T), - GDRAM2NRAM); - __bang_mul_scalar((T *)grad_input_buff, (T *)grad_output_buff, - ((T *)mask_buff)[mask_index], num_align); - __bang_atomic_add((T *)grad_input_buff, (T *)base_grad_input, - (T *)grad_input_buff, num_align); - __bang_mul((T *)input_buff, (T *)grad_output_buff, (T *)input_buff, - num_align); - - __bang_sumpool((T *)input_buff, (T *)input_buff, - NFU_ALIGN_SIZE / sizeof(T), - num_align / (NFU_ALIGN_SIZE / sizeof(T)), 1, - num_align / (NFU_ALIGN_SIZE / sizeof(T)), 1, 1, 1); - - __bang_reduce_sum((T *)input_buff, (T *)input_buff, - NFU_ALIGN_SIZE / sizeof(T)); - ((T *)grad_mask_buff)[mask_index] += ((T *)input_buff)[0]; - } - } - } - if (rem_for_loop) { - __bang_write_zero((T *)input_buff, NRAM_BLOCK / sizeof(T)); - T *base_grad_output = (T *)grad_output + n_k * ho * wo * c + - h_k * wo * c + w_k * c + group_k * group_size + - num_per_loop * num_align; - __memcpy((T *)grad_output_buff, (T *)base_grad_output, - rem_for_loop * sizeof(T), GDRAM2NRAM); - for (int ih = start_h; ih < end_h; ih++) { - for (int iw = start_w; iw < end_w; iw++) { - if (ih < 0 || ih > hi - 1 || iw < 0 || iw > wi - 1) { - continue; - } - int mask_ih = ih - h_i + (k_up - 1) / 2; - int mask_iw = iw - w_i + (k_up - 1) / 2; - int mask_index = mask_ih * k_up + mask_iw; - int input_index = n_k * hi * wi * c + ih * wi * c + iw * c + - group_k * group_size + num_per_loop * num_align; - T *base_input = (T *)input + input_index; - T *base_grad_input = (T *)grad_input + input_index; - __memcpy((T *)input_buff, (T *)base_input, rem_for_loop * sizeof(T), - GDRAM2NRAM); - __bang_mul_scalar((T *)grad_input_buff, (T *)grad_output_buff, - ((T *)mask_buff)[mask_index], rem_for_loop_align); - __bang_atomic_add((T *)grad_input_buff, (T *)base_grad_input, - (T *)grad_input_buff, rem_for_loop); - __bang_mul((T *)input_buff, (T *)grad_output_buff, (T *)input_buff, - rem_for_loop_align); - - __bang_sumpool( - (T *)input_buff, (T *)input_buff, NFU_ALIGN_SIZE / sizeof(T), - rem_for_loop_align / (NFU_ALIGN_SIZE / sizeof(T)), 1, - rem_for_loop_align / (NFU_ALIGN_SIZE / sizeof(T)), 1, 1, 1); - __bang_reduce_sum((T *)input_buff, (T *)input_buff, - NFU_ALIGN_SIZE / sizeof(T)); - - ((T *)grad_mask_buff)[mask_index] += ((T *)input_buff)[0]; - } - } - } - __memcpy((T *)base_grad_mask, (T *)grad_mask_buff, k_up * k_up * sizeof(T), - NRAM2GDRAM); - } -} - -template -__mlu_global__ void MLUUnion1KernelCarafeBackward( - const void *input, const void *mask, const void *grad_output, - void *grad_input, void *grad_mask, const int n, const int hi, const int wi, - const int c, const int k_up, const int group, const int scale) { - CarafeCompute((T *)input, (T *)mask, (T *)grad_output, (T *)grad_input, - (T *)grad_mask, n, hi, wi, c, k_up, group, scale); -} -} // namespace backward - -void KernelCarafeForward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, const cnrtDataType_t d_type, - const void *input, const void *mask, - const CarafeForwardParam ¶m, - const CarafeForwardBlockDim &block_dim, - const CarafeForwardGridDim &grid_dim, void *output) { - if (d_type == CNRT_FLOAT16) { - forward::MLUBLOCKKernelCarafeForward<<>>( - input, mask, param, block_dim, grid_dim, output); - } else { - forward::MLUBLOCKKernelCarafeForward<<>>( - input, mask, param, block_dim, grid_dim, output); - } -} - -void KernelCarafeBackward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, cnrtDataType_t dtype, - const void *input, const void *mask, - const void *grad_output, void *grad_input, - void *grad_mask, const int n, const int hi, - const int wi, const int c, const int k_up, - const int group, const int scale) { - if (dtype == CNRT_FLOAT16) { - backward::MLUUnion1KernelCarafeBackward<<>>( - input, mask, grad_output, grad_input, grad_mask, n, hi, wi, c, k_up, - group, scale); - } else { - backward::MLUUnion1KernelCarafeBackward<<>>( - input, mask, grad_output, grad_input, grad_mask, n, hi, wi, c, k_up, - group, scale); - } -} diff --git a/mmcv/ops/csrc/common/mlu/carafe_utils.hpp b/mmcv/ops/csrc/common/mlu/carafe_utils.hpp deleted file mode 100644 index 09ca60a..0000000 --- a/mmcv/ops/csrc/common/mlu/carafe_utils.hpp +++ /dev/null @@ -1,95 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#ifndef CARAFE_UTILS_HPP_ -#define CARAFE_UTILS_HPP_ - -#define NRAM_ALIGN_SIZE 64 - -struct CarafeForwardParam { - int N; // batch size - int Hi; // input height - int Wi; // input width - int Ci; // input channels - int Ho; // output height - int Wo; // output width - int Cg; // channels per group - - int kernel_size; // kernel_size - int group_size; // group_size - int scale_factor; // scale_factor - int kernel_size_half; // kernel half size (K-1)/2 - int kernel_size_sq; // square of kernel size - - int dtype_size; // size of tensor data type - - // Host arrays' geometry - int input_stride_g; - int input_stride_w; - int input_stride_h; - int input_stride_n; - int input_size; - int mask_stride_kh; - int mask_stride_g; - int mask_stride_w; - int mask_stride_h; - int mask_stride_n; - int mask_size; - int output_stride_g; - int output_stride_w; - int output_stride_h; - int output_stride_n; - int output_size; - - // NRAM arrays' geometry - int input_nram_stride_g; - int input_nram_stride_w; - int input_nram_stride_h; - int input_nram_size; - int mask_nram_stride_kh; - int mask_nram_stride_g; - int mask_nram_stride_w; - int mask_nram_stride_h; - int mask_nram_size; - int output_nram_stride_g; - int output_nram_stride_w; - int output_nram_stride_h; - int output_nram_size; - - // for address/compute alignment - int align_size_NRAM; // for addressing on NRAM - int align_size_NFU; // for NFU operation length - int block_Cg_NFU; // for bang_mul_const - - int job_num; // total job number -}; - -struct CarafeForwardBlockDim { - int Ho; // block size of output height - int Wo; // block size of output width - int Kh; // block size of kernel height - int Kw; // block size of kernel width - int G; // block size of groups - int Cg; // block size of channels within a group - int Hi; // block size of input height - int Wi; // block size of input width -}; - -struct CarafeForwardGridDim { - int Ho; // number of blocks of output height - int Wo; - int Kh; - int Kw; - int G; - int Cg; -}; - -#endif // CARAFE_UTILS_HPP_ diff --git a/mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp b/mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp deleted file mode 100644 index 88805ba..0000000 --- a/mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp +++ /dev/null @@ -1,398 +0,0 @@ -/************************************************************************* - * Copyright (C) 2021 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#ifndef COMMON_MLU_HELPER_HPP_ -#define COMMON_MLU_HELPER_HPP_ - -#define NFU_ALIGN_SIZE 128 // Byte -#define REM_FOR_STACK (128 * 1024) // 128KB reserved for cncc - -#ifdef __BANG_ARCH__ -#define MAX_NRAM_SIZE \ - (__MLU_NRAM_SIZE__ * 1024 - REM_FOR_STACK) // 128KB reserved for cncc -#define MAX_SRAM_SIZE \ - (__MLU_SRAM_SIZE__ * 1024 - REM_FOR_STACK) // 128KB reserved for cncc -#else -#define MAX_NRAM_SIZE (384 * 1024) // 384KB, initialization value -#define MAX_SRAM_SIZE (1920 * 1024) // 1920KB, initialization value -#endif - -#ifndef PAD_UP -#define PAD_UP(x, y) (((x) / (y) + (int)((x) % (y) > 0)) * (y)) -#endif - -#ifndef PAD_DOWN -#define PAD_DOWN(x, y) (((x) / (y)) * (y)) -#endif - -#define CEIL_ALIGN(x, y) (((x) + (y)-1) / (y) * (y)) - -template -__mlu_func__ inline scalar_t min(scalar_t a, scalar_t b) { - return a < b ? a : b; -} - -template -__mlu_func__ inline scalar_t max(scalar_t a, scalar_t b) { - return a > b ? a : b; -} - -/*! - * @brief loads data from global DRAM to NRAM with 2D pattern. - * - * @param[out] dst - * Pointer to NRAM that stores dst data. - * @param[in] src - * Pointer to global DRAM that stores src data. - * @param[in] size - * The byte size of segment in the lower dimension. - * @param[in] dst_str - * The data stride in bytes between segments in the lower dimension of dst. - * @param[in] src_str - * The data stride in bytes between segments in the lower dimension of src. - * @param[in] seg_num - * The total count of data segments in the lower dimension. - */ -template -__mlu_func__ void loadStr2D(T *dst, T *src, const int size, const int dst_str, - const int src_str, const int seg_num) { - if (dst_str == src_str && size == src_str) { - __memcpy(dst, src, src_str * seg_num * sizeof(T), GDRAM2NRAM); - } else if ((size == src_str || src_str <= dst_str) && - src_str * sizeof(T) <= 512) { - // gather data less than 512Bytes to improve IO efficiency - T *tmp = (T *)dst + (dst_str - src_str) * seg_num; - __memcpy(tmp, src, (src_str * (seg_num - 1) + size) * sizeof(T), - GDRAM2NRAM); - if (dst_str != src_str) { - __memcpy(dst, tmp, size * sizeof(T), NRAM2NRAM, dst_str * sizeof(T), - src_str * sizeof(T), seg_num - 1); - } - } else { - __memcpy(dst, src, size * sizeof(T), GDRAM2NRAM, dst_str * sizeof(T), - src_str * sizeof(T), seg_num - 1); - } -} - -/*! - * @brief loads data from global DRAM to NRAM with 3D pattern. - * - * @param[out] dst - * Pointer to NRAM that stores dst data. - * @param[in] src - * Pointer to global DRAM that stores src data. - * @param[in] size - * The byte size of segment in the lowest dimension. - * @param[in] seg_num_in - * The total count of data segments in the lowest dimension. - * @param[in] seg_num_out - * The total count of data segments in the middle dimension. - * @param[in] dst_str_in - * The data stride in bytes between segments in the lowest dimension of dst. - * @param[in] dst_str_out - * The data stride in bytes between segments in the middle dimension of dst. - * @param[in] src_str_in - * The data stride in bytes between segments in the lowest dimension of src. - * @param[in] src_str_out - * The data stride in bytes between segments in the middle dimension of src. - */ -template -__mlu_func__ void loadStr3D(T *dst, T *src, const int size, - const int seg_num_in, const int seg_num_out, - const int dst_str_in, const int dst_str_out, - const int src_str_in, const int src_str_out) { - T *tmp_dst = dst; - T *tmp_src = src; - - for (int i = 0; i < seg_num_out; ++i) { - loadStr2D(tmp_dst, tmp_src, size, dst_str_in, src_str_in, seg_num_in); - tmp_src += src_str_out; - tmp_dst += dst_str_out; - } -} - -/*! - * @brief stores data from NRAM to global DRAM with 2D pattern. - * - * @param[out] dst - * Pointer to global DRAM that stores dst data. - * @param[in] src - * Pointer to NRAM that stores src data. - * @param[in] size - * The byte size of segment in the lower dimension. - * @param[in] dst_str - * The data stride in bytes between segments in the lower dimension of dst. - * @param[in] src_str - * The data stride in bytes between segments in the lower dimension of src. - * @param[in] seg_num - * The total count of data segments in the lower dimension. - */ -template -__mlu_func__ void storeStr2D(T *dst, T *src, const int size, const int seg_num, - const int dst_str, const int src_str) { - if ((size == dst_str && dst_str <= src_str) && dst_str * sizeof(T) <= 512) { - // gather data less than 512Bytes to improve IO efficiency - if (dst_str != src_str) { - __memcpy(src, src, size * sizeof(T), NRAM2NRAM, dst_str * sizeof(T), - src_str * sizeof(T), seg_num - 1); - } - __memcpy(dst, src, size * seg_num * sizeof(T), NRAM2GDRAM); - } else { - __memcpy(dst, src, size * sizeof(T), NRAM2GDRAM, dst_str * sizeof(T), - src_str * sizeof(T), seg_num - 1); - } -} - -/*! - * @brief stores data from NRAM to global DRAM with 3D pattern. - * - * @param[out] dst - * Pointer to global DRAM that stores dst data. - * @param[in] src - * Pointer to NRAM that stores src data. - * @param[in] size - * The byte size of segment in the lowest dimension. - * @param[in] seg_num_in - * The total count of data segments in the lowest dimension. - * @param[in] seg_num_out - * The total count of data segments in the middle dimension. - * @param[in] dst_str_in - * The data stride in bytes between segments in the lowest dimension of dst. - * @param[in] dst_str_out - * The data stride in bytes between segments in the middle dimension of dst. - * @param[in] src_str_in - * The data stride in bytes between segments in the lowest dimension of src. - * @param[in] src_str_out - * The data stride in bytes between segments in the middle dimension of src. - */ -template -__mlu_func__ void storeStr3D(T *dst, T *src, const int size, - const int seg_num_in, const int seg_num_out, - const int dst_str_in, const int dst_str_out, - const int src_str_in, const int src_str_out) { - T *tmp_dst = dst; - T *tmp_src = src; - for (int i = 0; i < seg_num_out; ++i) { - storeStr2D(tmp_dst, tmp_src, size, seg_num_in, dst_str_in, src_str_in); - tmp_src += src_str_out; - tmp_dst += dst_str_out; - } -} - -/*! - * @brief Converts int32 to float32 data type. - * - * @param[out] dst - * Pointer to NRAM that stores int32 type data. - * @param[in,out] dst_addition - * Pointer to NRAM as the workspace of dst, which has the same size as dst. - * It allows empty pointer on MLU300 series. - * @param[in] src - * Pointer to NRAM that stores float32 type data. - * @param[in,out] src_addition - * Pointer to NRAM as the workspace of src, which has a size of 128 Bytes. - * It allows empty pointer on MLU300 series. - * @param[in] src_count - * The count of elements in src. - */ -__mlu_func__ void convertInt2Float(float *dst, float *dst_addition, int *src, - float *src_addition, const int src_count) { -#if __BANG_ARCH__ >= 300 - __bang_int2float((float *)dst, (int32_t *)src, src_count, 0); -#else - // get sign bit - const float move_23bit = 8388608.0; - // 0x80000000 = 1,000000000,0000000000000000000000000000 - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0x80000000); - __bang_cycle_band((char *)dst_addition, (char *)src, (char *)src_addition, - src_count * sizeof(float), NFU_ALIGN_SIZE); - // get 1 or 0 from sign bit - // judg is Odd - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0x00000001); - __bang_cycle_bor((char *)dst_addition, (char *)dst_addition, - (char *)src_addition, src_count * sizeof(float), - NFU_ALIGN_SIZE); - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0x80000001); - __bang_cycle_eq(dst_addition, dst_addition, src_addition, src_count, - NFU_ALIGN_SIZE / sizeof(float)); - // minus xor, positive num invariant - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0xffffffff); - __bang_cycle_mul(dst, dst_addition, src_addition, src_count, - NFU_ALIGN_SIZE / sizeof(float)); - __bang_bxor((char *)dst, (char *)src, (char *)dst, src_count * sizeof(float)); - // convert int32 to float32 - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0x7fffff); - __bang_cycle_band((char *)dst, (char *)dst, (char *)src_addition, - src_count * sizeof(float), NFU_ALIGN_SIZE); - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0x4b000000); - __bang_cycle_bor((char *)dst, (char *)dst, (char *)src_addition, - src_count * sizeof(float), NFU_ALIGN_SIZE); - __bang_sub_scalar(dst, dst, move_23bit, src_count); - // add one - __bang_add(dst, dst, dst_addition, src_count); - // set sign for float32 - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0xffffffff); - __bang_cycle_mul(dst_addition, dst_addition, src_addition, src_count, - NFU_ALIGN_SIZE / sizeof(float)); - - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0x00000001); - __bang_cycle_add(dst_addition, dst_addition, src_addition, src_count, - NFU_ALIGN_SIZE / sizeof(float)); - - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0x80000000); - __bang_cycle_band((char *)dst_addition, (char *)dst_addition, - (char *)src_addition, src_count * 4, 128); - __bang_bor((char *)dst, (char *)dst, (char *)dst_addition, src_count * 4); -#endif // __BANG_ARCH__ >= 300 -} - -/*! - * @brief Converts float32 to int32 data type with to_zero round mode. - * - * @param[out] dst - * Pointer to NRAM that stores float32 type data. - * @param[in,out] dst_addition - * Pointer to NRAM as the workspace of dst, which has the same size as dst. - * It allows empty pointer on MLU300 series. - * @param[in] src - * Pointer to NRAM that stores int32 type data. - * @param[in,out] src_addition - * Pointer to NRAM as the workspace of src, which has a size of 128 Bytes. - * It allows empty pointer on MLU300 series. - * @param[in] src_count - * The count of elements in src. - */ -__mlu_func__ void convertFloat2Int(int *dst, float *dst_addition, float *src, - float *src_addition, const int src_count) { -#if __BANG_ARCH__ >= 300 - __bang_float2int_tz((int32_t *)dst, (float *)src, src_count, 0); -#else - // sign ===> src_addition - // dst=-1.0 : when src[i] is a negative number - // dst=+1.0 : when src[i] is a positive number - const int floatDchar = sizeof(float) / sizeof(char); - __bang_active_sign((float *)dst, src, src_count); - // dst_addition = abs(src) - __bang_mul(dst_addition, src, (float *)dst, src_count); - // if dst_addition < 1.0 , then src_addition + 1, to fix add error. - __bang_write_value((float *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 1.0f); - __bang_cycle_lt(dst_addition, dst_addition, (float *)src_addition, src_count, - NFU_ALIGN_SIZE / sizeof(float)); - __bang_add_tz((float *)dst, (float *)dst, (float *)dst_addition, src_count); - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 0xbf800000); - // set negative flag -1.0 = 0xbf80000 - __bang_cycle_eq( - (float *)dst, (float *)dst, (float *)src_addition, src_count, - NFU_ALIGN_SIZE / sizeof(float)); // to mark all src in [x<-1.0] - __bang_active_abs(dst_addition, src, src_count); - __bang_write_value((float *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - 8388608.0f); - // mask shift move 23 - __bang_cycle_add_tz( - dst_addition, dst_addition, src_addition, src_count, - NFU_ALIGN_SIZE / sizeof(float)); // right shift move 23bit - // two`s complement for negatibe - // dst=1.0 , when src <-1.0 - // dst=0.0 , when src >=-1.0 - __bang_sub(dst_addition, dst_addition, (float *)dst, src_count); - // to fix max value - // 0 1001 0110 111 1111 1111 1111 1111 1111 <=> 0xcb7fffff <=> 16777215.0, - // means max value. - __bang_mul_scalar((float *)dst, (float *)dst, 16777215.0, src_count); - __bang_bxor((char *)dst_addition, (char *)dst_addition, (char *)dst, - src_count * floatDchar); - // get low 23bit - __bang_write_value((unsigned *)src_addition, NFU_ALIGN_SIZE / sizeof(float), - (unsigned)0x007fffff); - // mask low 23bit is 1 - __bang_cycle_band((char *)dst_addition, (char *)dst_addition, - (char *)src_addition, src_count * floatDchar, - NFU_ALIGN_SIZE / sizeof(char)); - // set 9 high bit ===> dst - // -2.0 <=> 0xc0000000 <=> 1100 0000 0000 0000 0000 0000 0000 0000 - // 1.0 <=> 0x3f800000 <=> 0011 1111 1000 0000 0000 0000 0000 0000 - __bang_write_value(src_addition, NFU_ALIGN_SIZE / sizeof(float), 0x3f800000); - __bang_cycle_and((float *)dst, (float *)dst, src_addition, src_count, - NFU_ALIGN_SIZE / sizeof(float)); - // src or dst_addition - __bang_bor((char *)dst_addition, (char *)dst, (char *)dst_addition, - src_count * floatDchar); - __bang_mul_scalar((float *)dst, (float *)dst, -2.0, src_count); - __bang_bor((char *)dst, (char *)dst, (char *)dst_addition, - src_count * floatDchar); -#endif // __BANG_ARCH__ >= 300 -} - -/*! - * @brief Converts float32 to half data type, - * the rounding mode on MLU200 is rd, on MLU300 is rn. - * - * @param[out] dst - * Pointer to NRAM that stores half type data. - * @param[in] src - * Pointer to NRAM that stores float32 type data. - * @param[in] src_count - * The count of elements in src. - */ -__mlu_func__ inline void convertFloat2half(half *dst, float *src, - int src_count) { -#if __BANG_ARCH__ >= 300 - __bang_float2half_rn(dst, src, src_count); -#else - __bang_float2half_rd(dst, src, src_count); -#endif -} - -/*! - * @brief recursiveSumPool. - * @param[in,out] dst - * Pointer to NRAM that stores the input and output data. - * @param[in] low_dim - * Which is the number of low dim. - * @param[in] high_dim - * Which is the number of high dim. - * @param[in] kernel_limit - * Which is the high_dim of sumpool per time. - ******************************************************************************/ -template -__mlu_func__ void recursiveSumPool(T *dst, int low_dim, int high_dim, - int kernel_limit) { - for (; high_dim > 1;) { - int repeat_s = high_dim / kernel_limit; - int remain_s = high_dim % kernel_limit; - - if (remain_s) { - __bang_sumpool((T *)dst, (T *)dst, low_dim, 1, remain_s, 1, remain_s, 1, - 1); - } - if (repeat_s) { - __bang_sumpool((T *)dst + (remain_s > 0 ? low_dim : 0), - (T *)dst + remain_s * low_dim, low_dim, - kernel_limit * repeat_s, 1, kernel_limit, 1, 1, - kernel_limit); - } - high_dim = repeat_s + (bool)remain_s; - } - return; -} - -#endif // COMMON_MLU_HELPER_HPP_ diff --git a/mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu deleted file mode 100644 index 6c765e3..0000000 --- a/mmcv/ops/csrc/common/mlu/deform_roi_pool_mlu_kernel.mlu +++ /dev/null @@ -1,712 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include - -#include "common_mlu_helper.hpp" - -#define ROI_OFFSET 5 -#define FOURSPLIT 4 -#define FIVESPLIT 5 -#define NINESPLIT 9 -#define THIRTEENSPLIT 13 - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -template -static __mlu_func__ void bilinearInterpolate(const int input_width, T y, T x, - T *w1, T *w2, T *w3, T *w4, - int *x_low, int *x_high, - const int y_low, bool *is_empty) { - if (x < -1.0 || x > input_width) { - *is_empty = true; - return; - } - - if (x <= 0) x = 0; - - *x_low = int(x); - - if (*x_low >= input_width - 1) { - *x_high = *x_low = input_width - 1; - x = T(*x_low); - } else { - *x_high = *x_low + 1; - } - - T ly = y - y_low; - T lx = x - *x_low; - T hy = 1.0 - ly; - T hx = 1.0 - lx; - *w1 = hy * hx; - *w2 = hy * lx; - *w3 = ly * hx; - *w4 = ly * lx; -} - -template -__mlu_func__ void MLUUnion1DeformRoIPoolForward( - const T *input, const T *rois, const T *offset, T *output, - const int channels, const int height, const int width, const int num_rois, - const int pooled_height, const int pooled_width, const T spatial_scale, - const int sampling_ratio, const T gamma) { - for (int bin_index = taskId; - bin_index < num_rois * pooled_width * pooled_height; - bin_index += taskDim) { - int out_batch = bin_index / pooled_width / pooled_height; - int out_height = bin_index / pooled_width % pooled_height; - int out_width = bin_index % pooled_width; - const T *cur_roi = rois + out_batch * ROI_OFFSET; - T *nram_rois = (T *)nram_buffer; - __memcpy((void *)nram_rois, (void *)cur_roi, ROI_OFFSET * sizeof(T), - GDRAM2NRAM); - const int roi_batch = nram_rois[0]; - T roi_x_min = nram_rois[1] * spatial_scale - 0.5; - T roi_y_min = nram_rois[2] * spatial_scale - 0.5; - const T roi_x_max = nram_rois[3] * spatial_scale - 0.5; - const T roi_y_max = nram_rois[4] * spatial_scale - 0.5; - const T roi_width = roi_x_max - roi_x_min; - const T roi_height = roi_y_max - roi_y_min; - const T bin_width = roi_width / static_cast(pooled_width); - const T bin_height = roi_height / static_cast(pooled_height); - const T *offset_input = input + roi_batch * height * width * channels; - int roi_bin_grid_height = - (sampling_ratio > 0) - ? sampling_ratio - : static_cast(ceilf(roi_height / pooled_height)); - int roi_bin_grid_width = - (sampling_ratio > 0) - ? sampling_ratio - : static_cast(ceilf(roi_width / pooled_width)); - if (offset != NULL) { - const T *offset_cur = offset + - out_batch * pooled_width * pooled_height * 2 + - out_height * pooled_width + out_width; - roi_x_min += gamma * roi_width * offset_cur[0]; - roi_y_min += - gamma * roi_height * offset_cur[pooled_width * pooled_height]; - } - int type_align = NFU_ALIGN_SIZE / sizeof(T); - int channels_max_num_nram = MAX_NRAM_SIZE / sizeof(T); - int channels_nram_split = - channels_max_num_nram / NINESPLIT / type_align * type_align; - int channel_rem = channels % channels_nram_split; - int channel_loops = - channels / channels_nram_split + (channel_rem != 0 ? 1 : 0); - for (int channel_loop_index = 0; channel_loop_index < channel_loops; - ++channel_loop_index) { - int channels_num = - channels_nram_split >= channels ? channels : channels_nram_split; - const int channel_offset = channel_loop_index * channels_num; - if (channel_loop_index + 1 == channel_loops && channel_rem != 0) { - channels_num = channel_rem; - } - int channels_align = CEIL_ALIGN(channels_num, type_align); - int nram_limit = (MAX_NRAM_SIZE / sizeof(T) - channels_align) >> 1; - int c_slice = nram_limit / FOURSPLIT / type_align * type_align; - int c_slice_align = 0; - - /* NRAM partition - * - * | | ping | pong | - * |----------|-------------------|-------------------| - * | nram_out | p1 | p2 | p3 | p4 | p1 | p2 | p3 | p4 | - * - */ - - T *nram_out = (T *)nram_buffer; - T *nram_ping = nram_out + channels_align; - T *nram_pong = nram_ping + nram_limit; - __bang_write_value((T *)nram_out, channels_align, (T)0); - __bang_write_value((T *)nram_ping, FOURSPLIT * c_slice, (T)0); - __bang_write_value((T *)nram_pong, FOURSPLIT * c_slice, (T)0); - const T num_bins = - static_cast(max(roi_bin_grid_height * roi_bin_grid_width, 1)); - const T value_div = 1.0f / num_bins; - bool is_ping_empty = true; - for (int iy = 0; iy < roi_bin_grid_height; ++iy) { - T y = roi_y_min + out_height * bin_height + - static_cast(iy + .5f) * bin_height / - static_cast(roi_bin_grid_height); - if (y < -1.0 || y > height) { - is_ping_empty = true; - continue; - } - if (y <= 0) { - y = 0; - } - int y_low = 0, y_high = 0; - y_low = int(y); - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = T(y_low); - } else { - y_high = y_low + 1; - } - for (int ix = 0; ix < roi_bin_grid_width; ++ix) { - T x = roi_x_min + out_width * bin_width + - static_cast(ix + .5f) * bin_width / - static_cast(roi_bin_grid_width); - const int sample_index = iy * roi_bin_grid_width + ix; - int c_rem = channels_num; - c_slice = nram_limit / FOURSPLIT / type_align * type_align; - c_slice_align = 0; - bool is_empty = false; - T w1, w2, w3, w4; - int x_low = 0, x_high = 0; - bilinearInterpolate(width, y, x, &w1, &w2, &w3, &w4, &x_low, &x_high, - y_low, &is_empty); - if (is_empty) { - is_ping_empty = true; - continue; - } - if (is_ping_empty) { - c_slice = c_slice > c_rem ? c_rem : c_slice; - c_slice_align = CEIL_ALIGN(c_slice, type_align); - __bang_write_value(nram_ping, FOURSPLIT * c_slice_align, (T)0); - __asm__ volatile("sync;"); - __memcpy(nram_ping, - offset_input + y_low * width * channels + - x_low * channels + channel_offset, - c_slice * sizeof(T), GDRAM2NRAM); - __memcpy(nram_ping + c_slice_align, - offset_input + y_low * width * channels + - x_high * channels + channel_offset, - c_slice * sizeof(T), GDRAM2NRAM); - __memcpy(nram_ping + 2 * c_slice_align, - offset_input + y_high * width * channels + - x_low * channels + channel_offset, - c_slice * sizeof(T), GDRAM2NRAM); - __memcpy(nram_ping + 3 * c_slice_align, - offset_input + y_high * width * channels + - x_high * channels + channel_offset, - c_slice * sizeof(T), GDRAM2NRAM); - is_ping_empty = false; - } - int c_offset = 0; - int pongc_slice = 0; - int pongc_slice_align = 0; - while (c_rem > 0) { - c_slice = c_slice > c_rem ? c_rem : c_slice; - c_slice_align = CEIL_ALIGN(c_slice, type_align); - if (sample_index + 1 < roi_bin_grid_height * roi_bin_grid_width) { - int iy_tmp = (sample_index + 1) / roi_bin_grid_width; - int ix_tmp = (sample_index + 1) % roi_bin_grid_width; - y = roi_y_min + out_height * bin_height + - static_cast(iy_tmp + .5f) * bin_height / - static_cast(roi_bin_grid_height); - x = roi_x_min + out_width * bin_width + - static_cast(ix_tmp + .5f) * bin_width / - static_cast(roi_bin_grid_width); - if (y < -1.0 || y > height) { - is_empty = true; - } else { - T w1_tmp, w2_tmp, w3_tmp, w4_tmp; - if (y <= 0) { - y = 0; - } - y_low = int(y); - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y = T(y_low); - } else { - y_high = y_low + 1; - } - bilinearInterpolate(width, y, x, &w1_tmp, &w2_tmp, &w3_tmp, - &w4_tmp, &x_low, &x_high, y_low, &is_empty); - } - pongc_slice = nram_limit / FOURSPLIT / type_align * type_align; - pongc_slice = - pongc_slice > channels_num ? channels_num : pongc_slice; - pongc_slice_align = CEIL_ALIGN(pongc_slice, type_align); - __bang_write_value(nram_pong, FOURSPLIT * pongc_slice_align, - (T)0); - __asm__ volatile("sync;"); - if (!is_empty) { - __memcpy_async(nram_pong, - offset_input + y_low * width * channels + - x_low * channels + channel_offset, - pongc_slice * sizeof(T), GDRAM2NRAM); - __memcpy_async(nram_pong + pongc_slice_align, - offset_input + y_low * width * channels + - x_high * channels + channel_offset, - pongc_slice * sizeof(T), GDRAM2NRAM); - __memcpy_async(nram_pong + 2 * pongc_slice_align, - offset_input + y_high * width * channels + - x_low * channels + channel_offset, - pongc_slice * sizeof(T), GDRAM2NRAM); - __memcpy_async(nram_pong + 3 * pongc_slice_align, - offset_input + y_high * width * channels + - x_high * channels + channel_offset, - pongc_slice * sizeof(T), GDRAM2NRAM); - } - } - __bang_mul_scalar(nram_ping, nram_ping, w1, c_slice_align); - __bang_mul_scalar(nram_ping + c_slice_align, - nram_ping + c_slice_align, w2, c_slice_align); - __bang_add(nram_ping, nram_ping, nram_ping + c_slice_align, - c_slice_align); - __bang_mul_scalar(nram_ping + 2 * c_slice_align, - nram_ping + 2 * c_slice_align, w3, c_slice_align); - __bang_add(nram_ping, nram_ping, nram_ping + 2 * c_slice_align, - c_slice_align); - __bang_mul_scalar(nram_ping + 3 * c_slice_align, - nram_ping + 3 * c_slice_align, w4, c_slice_align); - __bang_add(nram_ping, nram_ping, nram_ping + 3 * c_slice_align, - c_slice_align); - __bang_add(nram_out + c_offset, nram_out + c_offset, nram_ping, - c_slice_align); - T *nram_tmp = nram_ping; - nram_ping = nram_pong; - nram_pong = nram_tmp; - c_rem -= c_slice; - c_offset += c_slice; - __asm__ volatile("sync;"); - } - } - } - __bang_mul_scalar(nram_out, nram_out, value_div, channels_align); - __memcpy(output + channels * bin_index + channel_offset, nram_out, - channels_num * sizeof(T), NRAM2GDRAM); - } - } -} - -__mlu_global__ void MLUKernelDeformRoIPoolForward( - cnrtDataType_t data_type, const void *input, const void *rois, - const void *offset, void *output, const int channels, const int height, - const int width, const int num_rois, const int pooled_height, - const int pooled_width, const float spatial_scale, const int sampling_ratio, - const float gamma) { - switch (data_type) { - case CNRT_FLOAT16: { - MLUUnion1DeformRoIPoolForward((half *)input, (half *)rois, (half *)offset, - (half *)output, channels, height, width, - num_rois, pooled_height, pooled_width, - static_cast(spatial_scale), - sampling_ratio, static_cast(gamma)); - }; break; - case CNRT_FLOAT32: { - MLUUnion1DeformRoIPoolForward( - (float *)input, (float *)rois, (float *)offset, (float *)output, - channels, height, width, num_rois, pooled_height, pooled_width, - static_cast(spatial_scale), sampling_ratio, - static_cast(gamma)); - }; break; - default: { - break; - } - } -} - -void KernelDeformRoIPoolForward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, cnrtDataType_t data_type, - const void *input, const void *rois, - const void *offset, void *output, - const int channels, const int height, - const int width, const int num_rois, - const int pooled_height, const int pooled_width, - const float spatial_scale, - const int sampling_ratio, const float gamma) { - MLUKernelDeformRoIPoolForward<<>>( - data_type, input, rois, offset, output, channels, height, width, num_rois, - pooled_height, pooled_width, spatial_scale, sampling_ratio, gamma); -} - -template -__mlu_func__ void MLUUnion1DeformRoIPoolBackward( - const T *grad_output, const T *input, const T *rois, const T *offset, - T *grad_input, T *grad_offset, const int channels, const int height, - const int width, const int num_rois, const int pooled_height, - const int pooled_width, const T spatial_scale, const int sampling_ratio, - const T gamma) { - for (int bin_index = taskId; - bin_index < num_rois * pooled_width * pooled_height; - bin_index += taskDim) { - int out_batch = bin_index / pooled_width / pooled_height; - int out_height = bin_index / pooled_width % pooled_height; - int out_width = bin_index % pooled_width; - const T *cur_roi = rois + out_batch * ROI_OFFSET; - T *nram_rois = (T *)nram_buffer; - __memcpy((void *)nram_rois, (void *)cur_roi, ROI_OFFSET * sizeof(T), - GDRAM2NRAM); - const int roi_batch = nram_rois[0]; - T roi_x_min = nram_rois[1] * spatial_scale - 0.5; - T roi_y_min = nram_rois[2] * spatial_scale - 0.5; - const T roi_x_max = nram_rois[3] * spatial_scale - 0.5; - const T roi_y_max = nram_rois[4] * spatial_scale - 0.5; - const T roi_width = roi_x_max - roi_x_min; - const T roi_height = roi_y_max - roi_y_min; - const T bin_width = roi_width / static_cast(pooled_width); - const T bin_height = roi_height / static_cast(pooled_height); - const T *offset_input = input + roi_batch * height * width * channels; - T *offset_grad_input = grad_input + roi_batch * height * width * channels; - int roi_bin_grid_height = - (sampling_ratio > 0) - ? sampling_ratio - : static_cast(ceilf(roi_height / pooled_height)); - int roi_bin_grid_width = - (sampling_ratio > 0) - ? sampling_ratio - : static_cast(ceilf(roi_width / pooled_width)); - if (offset != NULL) { - const T *offset_cur = offset + - out_batch * pooled_width * pooled_height * 2 + - out_height * pooled_width + out_width; - roi_x_min += gamma * roi_width * offset_cur[0]; - roi_y_min += - gamma * roi_height * offset_cur[pooled_width * pooled_height]; - } - - /* NRAM partition - * - * If offset != NULL, NRAM partition belows. - * | | - * ping | pong | - * |---------------------------------------------------------------------|-----------|-----------| - * |nram_tmp1|nram_tmp2|nram_tmp3|nram_tmp4|nram_grad_output|nram_sum_tmp|p1|p2|p3|p4|p1|p2|p3|p4| - * - * If offset == NULL, ping and pang will not be needed. - * | | - * |----------------------------------------------------------------------------------| - * | nram_tmp1 | nram_tmp2 | nram_tmp3 | nram_tmp4 | nram_grad_output | - * - */ - - int type_align = NFU_ALIGN_SIZE / sizeof(T); - int channels_max_num_nram = MAX_NRAM_SIZE / sizeof(T); - int channels_nram_split = - channels_max_num_nram / FIVESPLIT / type_align * type_align; - int channel_rem = channels % channels_nram_split; - int channel_loops = - channels / channels_nram_split + (channel_rem != 0 ? 1 : 0); - if (offset != NULL) { - channels_nram_split = - channels_max_num_nram / THIRTEENSPLIT / type_align * type_align; - channel_rem = channels % channels_nram_split; - channel_loops = - channels / channels_nram_split + (channel_rem != 0 ? 1 : 0); - } - - for (int channel_loop_index = 0; channel_loop_index < channel_loops; - ++channel_loop_index) { - int channels_num = - channels_nram_split >= channels ? channels : channels_nram_split; - const int channel_offset = channel_loop_index * channels_num; - if (channel_loop_index + 1 == channel_loops && channel_rem != 0) { - channels_num = channel_rem; - } - int channels_align = CEIL_ALIGN(channels_num, type_align); - const int32_t nram_sum_tmp_channel = NFU_ALIGN_SIZE / sizeof(T); - int nram_limit = (MAX_NRAM_SIZE / sizeof(T) - 5 * channels_align - - nram_sum_tmp_channel) >> - 1; - int c_slice = 0; - int c_slice_align = 0; - T *nram_tmp1 = (T *)nram_buffer; - T *nram_tmp2 = (T *)nram_buffer + channels_align; - T *nram_tmp3 = (T *)nram_buffer + 2 * channels_align; - T *nram_tmp4 = (T *)nram_buffer + 3 * channels_align; - T *nram_grad_output = nram_tmp4 + channels_align; - T *nram_sum_tmp = NULL; - T *nram_ping_input = NULL; - T *nram_pong_input = NULL; - __bang_write_value((T *)nram_grad_output, channels_align, (T)0); - __asm__ volatile("sync;"); - - if (offset != NULL) { - c_slice = nram_limit / FOURSPLIT / type_align * type_align; - nram_sum_tmp = nram_grad_output + channels_align; - nram_ping_input = nram_sum_tmp + nram_sum_tmp_channel; - nram_pong_input = nram_ping_input + FOURSPLIT * c_slice; - __bang_write_value((T *)nram_sum_tmp, nram_sum_tmp_channel, (T)0); - __bang_write_value((T *)nram_ping_input, FOURSPLIT * c_slice, (T)0); - __bang_write_value((T *)nram_pong_input, FOURSPLIT * c_slice, (T)0); - __asm__ volatile("sync;"); - } - const T num_bins = - static_cast(max(roi_bin_grid_height * roi_bin_grid_width, 1)); - const T value_div = 1.0f / num_bins; - bool is_ping_empty = true; - __memcpy(nram_grad_output, - grad_output + channels * bin_index + channel_offset, - channels_num * sizeof(T), GDRAM2NRAM); - __bang_mul_scalar(nram_grad_output, nram_grad_output, value_div, - channels_align); - for (int iy = 0; iy < roi_bin_grid_height; ++iy) { - T y = roi_y_min + out_height * bin_height + - static_cast(iy + .5f) * bin_height / - static_cast(roi_bin_grid_height); - T y_tmp = y; - if (y_tmp < -1.0 || y_tmp > height) { - is_ping_empty = true; - continue; - } - if (y_tmp <= 0) { - y_tmp = 0; - } - int y_low = 0, y_high = 0; - y_low = int(y_tmp); - if (y_low >= height - 1) { - y_high = y_low = height - 1; - y_tmp = T(y_low); - } else { - y_high = y_low + 1; - } - for (int ix = 0; ix < roi_bin_grid_width; ++ix) { - T x = roi_x_min + out_width * bin_width + - static_cast(ix + .5f) * bin_width / - static_cast(roi_bin_grid_width); - const int sample_index = iy * roi_bin_grid_width + ix; - int c_rem = channels_num; - bool is_empty = false; - T w1, w2, w3, w4; - int x_low = 0, x_high = 0; - bilinearInterpolate(width, y_tmp, x, &w1, &w2, &w3, &w4, &x_low, - &x_high, y_low, &is_empty); - if (is_empty) { - is_ping_empty = true; - continue; - } - __bang_mul_scalar((T *)nram_tmp1, (T *)nram_grad_output, w1, - channels_align); - __bang_mul_scalar((T *)nram_tmp2, (T *)nram_grad_output, w2, - channels_align); - __bang_mul_scalar((T *)nram_tmp3, (T *)nram_grad_output, w3, - channels_align); - __bang_mul_scalar((T *)nram_tmp4, (T *)nram_grad_output, w4, - channels_align); - __asm__ volatile("sync;"); - __bang_atomic_add( - (T *)nram_tmp1, - (T *)(offset_grad_input + (y_low * width + x_low) * channels + - channel_offset), - (T *)nram_tmp1, channels_num); - __bang_atomic_add( - (T *)nram_tmp2, - (T *)(offset_grad_input + (y_low * width + x_high) * channels + - channel_offset), - (T *)nram_tmp2, channels_num); - __bang_atomic_add( - (T *)nram_tmp3, - (T *)(offset_grad_input + (y_high * width + x_low) * channels + - channel_offset), - (T *)nram_tmp3, channels_num); - __bang_atomic_add( - (T *)nram_tmp4, - (T *)(offset_grad_input + (y_high * width + x_high) * channels + - channel_offset), - (T *)nram_tmp4, channels_num); - if (offset != NULL) { - c_slice = nram_limit / FOURSPLIT / type_align * type_align; - c_slice_align = 0; - if (is_ping_empty) { - c_slice = c_slice > c_rem ? c_rem : c_slice; - c_slice_align = CEIL_ALIGN(c_slice, type_align); - __bang_write_value(nram_ping_input, FOURSPLIT * c_slice_align, - (T)0); - __asm__ volatile("sync;"); - const T *src_offset1 = offset_input + y_low * width * channels + - x_low * channels + channel_offset; - const T *src_offset2 = offset_input + y_low * width * channels + - x_high * channels + channel_offset; - const T *src_offset3 = offset_input + y_high * width * channels + - x_low * channels + channel_offset; - const T *src_offset4 = offset_input + y_high * width * channels + - x_high * channels + channel_offset; - __memcpy(nram_ping_input, src_offset1, c_slice * sizeof(T), - GDRAM2NRAM); - __memcpy(nram_ping_input + c_slice_align, src_offset2, - c_slice * sizeof(T), GDRAM2NRAM); - __memcpy(nram_ping_input + 2 * c_slice_align, src_offset3, - c_slice * sizeof(T), GDRAM2NRAM); - __memcpy(nram_ping_input + 3 * c_slice_align, src_offset4, - c_slice * sizeof(T), GDRAM2NRAM); - is_ping_empty = false; - } - int c_offset = 0; - int pongc_slice = 0; - int pongc_slice_align = 0; - while (c_rem > 0) { - c_slice = c_slice > c_rem ? c_rem : c_slice; - c_slice_align = CEIL_ALIGN(c_slice, type_align); - if (sample_index + 1 < roi_bin_grid_height * roi_bin_grid_width) { - int iy_tmp = (sample_index + 1) / roi_bin_grid_width; - int ix_tmp = (sample_index + 1) % roi_bin_grid_width; - T y_tmp = roi_y_min + out_height * bin_height + - static_cast(iy_tmp + .5f) * bin_height / - static_cast(roi_bin_grid_height); - T x_tmp = roi_x_min + out_width * bin_width + - static_cast(ix_tmp + .5f) * bin_width / - static_cast(roi_bin_grid_width); - int x_low_tmp = 0, x_high_tmp = 0, y_low_tmp = 0, - y_high_tmp = 0; - if (y_tmp < -1.0 || y_tmp > height) { - is_empty = true; - } else { - T w1_tmp, w2_tmp, w3_tmp, w4_tmp; - if (y_tmp <= 0) { - y_tmp = 0; - } - y_low_tmp = int(y_tmp); - if (y_low_tmp >= height - 1) { - y_high_tmp = y_low_tmp = height - 1; - y_tmp = T(y_low_tmp); - } else { - y_high_tmp = y_low_tmp + 1; - } - bilinearInterpolate(width, y_tmp, x_tmp, &w1_tmp, &w2_tmp, - &w3_tmp, &w4_tmp, &x_low_tmp, &x_high_tmp, - y_low_tmp, &is_empty); - } - pongc_slice = nram_limit / FOURSPLIT / type_align * type_align; - pongc_slice = - pongc_slice > channels_num ? channels_num : pongc_slice; - pongc_slice_align = CEIL_ALIGN(pongc_slice, type_align); - __bang_write_value(nram_pong_input, - FOURSPLIT * pongc_slice_align, (T)0); - __asm__ volatile("sync;"); - if (!is_empty) { - const T *src_offset1 = offset_input + - y_low_tmp * width * channels + - x_low_tmp * channels + channel_offset; - const T *src_offset2 = offset_input + - y_low_tmp * width * channels + - x_high_tmp * channels + channel_offset; - const T *src_offset3 = offset_input + - y_high_tmp * width * channels + - x_low_tmp * channels + channel_offset; - const T *src_offset4 = offset_input + - y_high_tmp * width * channels + - x_high_tmp * channels + channel_offset; - __memcpy_async(nram_pong_input, src_offset1, - pongc_slice * sizeof(T), GDRAM2NRAM); - __memcpy_async(nram_pong_input + pongc_slice_align, - src_offset2, pongc_slice * sizeof(T), - GDRAM2NRAM); - __memcpy_async(nram_pong_input + 2 * pongc_slice_align, - src_offset3, pongc_slice * sizeof(T), - GDRAM2NRAM); - __memcpy_async(nram_pong_input + 3 * pongc_slice_align, - src_offset4, pongc_slice * sizeof(T), - GDRAM2NRAM); - } - } - - __bang_mul_scalar(nram_tmp1, nram_ping_input + 3 * c_slice_align, - y - y_low, c_slice_align); - __bang_mul_scalar(nram_tmp2, nram_ping_input + c_slice_align, - y_high - y, c_slice_align); - __bang_add(nram_tmp1, nram_tmp1, nram_tmp2, c_slice_align); - __bang_mul_scalar(nram_tmp2, nram_ping_input + 2 * c_slice_align, - y_low - y, c_slice_align); - __bang_add(nram_tmp1, nram_tmp1, nram_tmp2, c_slice_align); - __bang_mul_scalar(nram_tmp2, nram_ping_input, y - y_high, - c_slice_align); - __bang_add(nram_tmp1, nram_tmp1, nram_tmp2, c_slice_align); - __bang_mul_scalar(nram_tmp1, nram_tmp1, gamma * roi_width, - c_slice_align); - __bang_mul(nram_tmp1, nram_grad_output, nram_tmp1, c_slice_align); - const int32_t kernel_width = - c_slice_align / nram_sum_tmp_channel + - (int32_t)(c_slice_align % nram_sum_tmp_channel > 0); - __bang_sumpool(nram_sum_tmp, nram_tmp1, nram_sum_tmp_channel, 1, - kernel_width, 1, kernel_width, kernel_width, 1); - __bang_reduce_sum(nram_sum_tmp, nram_sum_tmp, - nram_sum_tmp_channel); - __bang_atomic_add( - (T *)nram_sum_tmp, - (T *)(grad_offset + - out_batch * pooled_width * pooled_height * 2 + - out_height * pooled_width + out_width), - (T *)nram_sum_tmp, 1); - __bang_write_value((T *)nram_sum_tmp, nram_sum_tmp_channel, (T)0); - __bang_mul_scalar(nram_tmp1, nram_ping_input + 3 * c_slice_align, - x - x_low, c_slice_align); - __bang_mul_scalar(nram_tmp2, nram_ping_input + 2 * c_slice_align, - x_high - x, c_slice_align); - __bang_add(nram_tmp1, nram_tmp1, nram_tmp2, c_slice_align); - __bang_mul_scalar(nram_tmp2, nram_ping_input + c_slice_align, - x_low - x, c_slice_align); - __bang_add(nram_tmp1, nram_tmp1, nram_tmp2, c_slice_align); - __bang_mul_scalar(nram_tmp2, nram_ping_input, x - x_high, - c_slice_align); - __bang_add(nram_tmp1, nram_tmp1, nram_tmp2, c_slice_align); - __bang_mul_scalar(nram_tmp1, nram_tmp1, gamma * roi_height, - c_slice_align); - __bang_mul(nram_tmp1, nram_grad_output, nram_tmp1, c_slice_align); - __bang_sumpool(nram_sum_tmp, nram_tmp1, nram_sum_tmp_channel, 1, - kernel_width, 1, kernel_width, kernel_width, 1); - __bang_reduce_sum(nram_sum_tmp, nram_sum_tmp, - NFU_ALIGN_SIZE / sizeof(T)); - __bang_atomic_add( - (T *)nram_sum_tmp, - (T *)(grad_offset + - out_batch * pooled_width * pooled_height * 2 + - pooled_width * pooled_height + - out_height * pooled_width + out_width), - (T *)nram_sum_tmp, 1); - - T *nram_tmp = nram_ping_input; - nram_ping_input = nram_pong_input; - nram_pong_input = nram_tmp; - c_rem -= c_slice; - c_offset += c_slice; - __asm__ volatile("sync;"); - } - } - } - } - } - } -} - -__mlu_global__ void MLUKernelDeformRoIPoolBackward( - cnrtDataType_t data_type, const void *grad_output, const void *input, - const void *rois, const void *offset, void *grad_input, void *grad_offset, - const int channels, const int height, const int width, const int num_rois, - const int pooled_height, const int pooled_width, const float spatial_scale, - const int sampling_ratio, const float gamma) { - switch (data_type) { - case CNRT_FLOAT16: { - MLUUnion1DeformRoIPoolBackward( - (half *)grad_output, (half *)input, (half *)rois, (half *)offset, - (half *)grad_input, (half *)grad_offset, channels, height, width, - num_rois, pooled_height, pooled_width, - static_cast(spatial_scale), sampling_ratio, - static_cast(gamma)); - }; break; - case CNRT_FLOAT32: { - MLUUnion1DeformRoIPoolBackward( - (float *)grad_output, (float *)input, (float *)rois, (float *)offset, - (float *)grad_input, (float *)grad_offset, channels, height, width, - num_rois, pooled_height, pooled_width, - static_cast(spatial_scale), sampling_ratio, - static_cast(gamma)); - }; break; - default: { - break; - } - } -} - -void KernelDeformRoIPoolBackward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - cnrtDataType_t data_type, const void *grad_output, const void *input, - const void *rois, const void *offset, void *grad_input, void *grad_offset, - const int channels, const int height, const int width, const int num_rois, - const int pooled_height, const int pooled_width, const float spatial_scale, - const int sampling_ratio, const float gamma) { - MLUKernelDeformRoIPoolBackward<<>>( - data_type, grad_output, input, rois, offset, grad_input, grad_offset, - channels, height, width, num_rois, pooled_height, pooled_width, - spatial_scale, sampling_ratio, gamma); -} diff --git a/mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu deleted file mode 100644 index 7624379..0000000 --- a/mmcv/ops/csrc/common/mlu/focal_loss_sigmoid_mlu_kernel.mlu +++ /dev/null @@ -1,888 +0,0 @@ -/************************************************************************* - * Copyright (C) 2021 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include - -#include "common_mlu_helper.hpp" - -#define PING 0 -#define PONG 1 - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -namespace forward { -template -__mlu_func__ void loadInput(char *nram_input, T *dram_input, const int32_t size, - const int32_t dst_stride = 0, - const int32_t src_stride = 0, - const int32_t count = 1) { - if (dst_stride == src_stride) { - __memcpy_async(nram_input, dram_input, size * count, GDRAM2NRAM); - } else { - __memcpy_async(nram_input, dram_input, size, GDRAM2NRAM, dst_stride, - src_stride, count - 1); - } -} - -template -__mlu_func__ void loadWeight(char *nram_input, T *dram_input, const int32_t t, - const int32_t c, const int32_t has_weight, - const int32_t partition_nc) { - if (has_weight && partition_nc && t >= 0 && t < c) { - __memcpy_async(nram_input, (T *)dram_input + t, sizeof(T), GDRAM2NRAM); - } -} - -template -__mlu_func__ void storeOutput(T *dram_output, char *nram_output, - const int32_t size, const int32_t dst_stride = 0, - const int32_t src_stride = 0, - const int32_t count = 1) { - if (dst_stride == src_stride) { - __memcpy_async(dram_output, nram_output, size * count, NRAM2GDRAM); - } else { - __memcpy_async(dram_output, nram_output, size, NRAM2GDRAM, dst_stride, - src_stride, count - 1); - } -} - -template -__mlu_func__ void compute(T *input, const int32_t *target, const T *weight, - const int32_t has_weight, const int32_t partition_nc, - const int32_t deal_num, const int32_t n_seg, - const int32_t c, const int32_t c_seg, - const int32_t c_start_index, const float alpha, - const float gamma, T *compute_a, T *compute_b, - T *output) { - // set params - const int32_t c_num = - has_weight ? PAD_UP(c_seg, NFU_ALIGN_SIZE / sizeof(T)) : c_seg; - const int32_t c_end_index = c_start_index + c_seg; - const int32_t half_epsilon = 0x0400; - const T epsilon_f = - sizeof(T) == sizeof(float) ? FLT_MIN : *((half *)&half_epsilon); - - // 0. alpha_t * p_t^r = alpha * (1 - p) ^ gamma if t == c_i - // = (1 - alpha) * p ^ gamma if t != c_i - __nramset((T *)output, deal_num, (T)(1 - alpha)); - __bang_active_sigmoid((T *)compute_b, (T *)input, deal_num); - for (int32_t i = 0; i < n_seg; ++i) { - const int32_t t = *((uint32_t *)target + i); - if (t >= c_start_index && t < c_end_index) { - const uint32_t index = i * c_num + t - c_start_index; - *((T *)input + index) = -1.0 * (*((T *)input + index)); - *((T *)compute_b + index) = 1.0 - (*((T *)compute_b + index)) + epsilon_f; - *((T *)output + index) = alpha; - } - } - if (sizeof(T) == sizeof(half)) { - __bang_half2float((float *)compute_a, (half *)compute_b, deal_num); - __bang_active_loghp((float *)compute_a, (float *)compute_a, deal_num); - __bang_mul_const((float *)compute_a, (float *)compute_a, (float)gamma, - deal_num); - __bang_active_exphp((float *)compute_a, (float *)compute_a, deal_num); - __bang_float2half_rd((half *)compute_a, (float *)compute_a, deal_num); - } else { - __bang_active_loghp((T *)compute_a, (T *)compute_b, deal_num); - __bang_mul_const((T *)compute_a, (T *)compute_a, (T)gamma, deal_num); - __bang_active_exphp((T *)compute_a, (T *)compute_a, deal_num); - } - __bang_mul((T *)output, (T *)compute_a, (T *)output, deal_num); - - // 1. max = max(0, -x) if t == c_i - // = max(0, x) if t != c_i - __nramset((T *)compute_b, deal_num, (T)0); - __bang_maxequal((T *)compute_b, (T *)compute_b, (T *)input, deal_num); - - // 2. -log(p_t) = ln(e^(-max)+ e^(-max-x) + max if t == c_i - // = ln(e^(-max)+ e^(-max+x) + max if t != c_i - __bang_mul_const((T *)compute_a, (T *)compute_b, (T)-1.0, deal_num); - __bang_add((T *)input, (T *)compute_a, (T *)input, deal_num); - - __bang_active_exphp((T *)compute_a, (T *)compute_a, deal_num); - __bang_active_exphp((T *)input, (T *)input, deal_num); - __bang_add((T *)compute_a, (T *)compute_a, (T *)input, deal_num); - __bang_active_loghp((T *)compute_a, (T *)compute_a, deal_num); - __bang_add((T *)input, (T *)compute_a, (T *)compute_b, deal_num); - - // 3. output = alpha_t * p_t^r * [-log(p_t)] - __bang_mul((T *)output, (T *)output, (T *)input, deal_num); - - // 4. with weight - if (has_weight) { - for (int32_t i = 0; i < n_seg; ++i) { - int32_t t = *((int32_t *)target + i); - if (t >= 0 && t < c) { - t = partition_nc ? 0 : t; - __bang_mul_const((T *)output + i * c_num, (T *)output + i * c_num, - *((T *)weight + t), c_num); - } - } - } -} - -template -__mlu_func__ void startPipeline( - const T *input, const int32_t *target, const T *weight, - char *nram_compute_a, char *nram_compute_b, char *nram_input, - char *nram_target, char *nram_weight, char *nram_output, - const int32_t has_weight, const int32_t partition_nc, - const int32_t pingpong_offset, const int32_t pingpong_weight_offset, - const int32_t c_offset_num, const int32_t n, const int32_t n_seg, - const int32_t c, const int32_t c_seg, const float alpha, const float gamma, - T *output) { - // with offset - input = (T *)((char *)input + c_offset_num * sizeof(T)); - output = (T *)((char *)output + c_offset_num * sizeof(T)); - - const int32_t c_seg_align_num = PAD_UP(c_seg, NFU_ALIGN_SIZE / sizeof(T)); - const int32_t c_num = has_weight ? c_seg_align_num : c_seg; - const int32_t deal_num = PAD_UP(n_seg * c_num, NFU_ALIGN_SIZE / sizeof(T)); - const int32_t load_size = c_seg * sizeof(T); - const int32_t dram_stride = c * sizeof(T); - const int32_t nram_stride = c_num * sizeof(T); - - if (has_weight && !partition_nc) { - loadInput(nram_weight, (T *)weight, load_size, nram_stride, dram_stride, - 1); - __asm__ volatile("sync;\n\t"); - } - const int32_t repeat = n / n_seg; - const int32_t remain = n % n_seg; - - /* - * Pipeline: The pipeline is processed in three stages: Load, Compute, Store. - * The allocated memory space of NRAM is divided into two parts: - * PING and Pong. In a single time slice, PING is used to process - * IO stream and PONG is used for computation. Both of them are - * processed synchronously until finished. - * - * diagram of PINGPONG: - * |------|-----------------------------------------------------------------| - * | | space | - * |------|-----------------------------------------------------------------| - * | time | Ping | Pong | Ping | Pong | Ping | Pong | - * |------|-----------------------------------------------------------------| - * | 0 | L0 | | | | | | - * | 1 | C0 | L1 | | | | | - * | 2 | S0 | C1 | L2 | | | | - * | 3 | | S1 | C2 | L3 | | | - * | 4 | | | S2 | C3 | L4 | | - * | 5 | | | | S3 | C4 | L5 | - * | 6 | | | | | S4 | C5 | - * | 7 | | | | | | S5 | - * |------|-----------------------------------------------------------------| - */ - - // diagram of PINGPONG: L0 - if (repeat > 0) { - loadInput(nram_input, (T *)input, load_size, nram_stride, dram_stride, - n_seg); - loadInput(nram_target, (int32_t *)target, n_seg * sizeof(int32_t)); - loadWeight(nram_weight, (T *)weight, *((int32_t *)target), c, has_weight, - partition_nc); - __asm__ volatile("sync;\n\t"); - } - - // diagram of PINGPONG: C0 and L1 - if (repeat > 1) { - compute((T *)nram_input, (int32_t *)nram_target, (T *)nram_weight, - has_weight, partition_nc, deal_num, n_seg, c, c_seg, c_offset_num, - alpha, gamma, (T *)nram_compute_a, (T *)nram_compute_b, - (T *)nram_output); - loadInput((char *)nram_input + pingpong_offset, (T *)input + c * n_seg, - load_size, nram_stride, dram_stride, n_seg); - loadInput((char *)nram_target + pingpong_offset, - (int32_t *)target + n_seg, n_seg * sizeof(int32_t)); - loadWeight((char *)nram_weight + pingpong_weight_offset, (T *)weight, - *((int32_t *)target + n_seg), c, has_weight, partition_nc); - __asm__ volatile("sync;\n\t"); - } - - for (int32_t i = 0; i < repeat - 2; ++i) { - storeOutput((T *)output + i * c * n_seg, - nram_output + (i % 2) * pingpong_offset, load_size, - dram_stride, nram_stride, n_seg); - loadInput((char *)nram_input + (i % 2) * pingpong_offset, - (T *)(input) + (i + 2) * c * n_seg, load_size, nram_stride, - dram_stride, n_seg); - loadInput((char *)nram_target + (i % 2) * pingpong_offset, - (int32_t *)target + (i + 2) * n_seg, - n_seg * sizeof(int32_t)); - loadWeight((char *)nram_weight + (i % 2) * pingpong_weight_offset, - (T *)weight, *((int32_t *)target + (i + 2) * n_seg), c, - has_weight, partition_nc); - compute((T *)(nram_input + ((i + 1) % 2) * pingpong_offset), - (int32_t *)(nram_target + ((i + 1) % 2) * pingpong_offset), - (T *)(nram_weight + - partition_nc * ((i + 1) % 2) * pingpong_weight_offset), - has_weight, partition_nc, deal_num, n_seg, c, c_seg, c_offset_num, - alpha, gamma, (T *)nram_compute_a, (T *)nram_compute_b, - (T *)(nram_output + ((i + 1) % 2) * pingpong_offset)); - __asm__ volatile("sync;\n\t"); - } - - if (repeat > 1) { - storeOutput((T *)output + (repeat - 2) * c * n_seg, - (char *)nram_output + (repeat % 2) * pingpong_offset, - load_size, dram_stride, nram_stride, n_seg); - } - - if (remain > 0) { - loadInput((char *)nram_input + (repeat % 2) * pingpong_offset, - (T *)input + repeat * c * n_seg, load_size, nram_stride, - dram_stride, remain); - loadInput((char *)nram_target + (repeat % 2) * pingpong_offset, - (int32_t *)target + repeat * n_seg, - remain * sizeof(int32_t)); - loadWeight((char *)nram_weight + (repeat % 2) * pingpong_weight_offset, - (T *)weight, *((int32_t *)target + repeat * n_seg), c, - has_weight, partition_nc); - } - - if (repeat > 0) { - compute((T *)(nram_input + ((repeat - 1) % 2) * pingpong_offset), - (int32_t *)(nram_target + ((repeat - 1) % 2) * pingpong_offset), - (T *)(nram_weight + - partition_nc * ((repeat - 1) % 2) * pingpong_weight_offset), - has_weight, partition_nc, deal_num, n_seg, c, c_seg, c_offset_num, - alpha, gamma, (T *)nram_compute_a, (T *)nram_compute_b, - (T *)(nram_output + ((repeat - 1) % 2) * pingpong_offset)); - } - __asm__ volatile("sync;\n\t"); - - if (repeat > 0) { - storeOutput((T *)output + (repeat - 1) * c * n_seg, - (char *)nram_output + ((repeat - 1) % 2) * pingpong_offset, - load_size, dram_stride, nram_stride, n_seg); - } - - if (remain > 0) { - int32_t rem_num = PAD_UP(remain * c_num, NFU_ALIGN_SIZE / sizeof(T)); - compute((T *)(nram_input + (repeat % 2) * pingpong_offset), - (int32_t *)(nram_target + (repeat % 2) * pingpong_offset), - (T *)(nram_weight + - partition_nc * (repeat % 2) * pingpong_weight_offset), - has_weight, partition_nc, rem_num, remain, c, c_seg, c_offset_num, - alpha, gamma, (T *)nram_compute_a, (T *)nram_compute_b, - (T *)(nram_output + (repeat % 2) * pingpong_offset)); - __asm__ volatile("sync;\n\t"); - - storeOutput((T *)output + repeat * c * n_seg, - (char *)nram_output + (repeat % 2) * pingpong_offset, - load_size, dram_stride, nram_stride, remain); - } - __asm__ volatile("sync;\n\t"); -} - -template -__mlu_func__ void focalLossSigmoidForwardBlock( - const T *input, const int32_t *target, const T *weight, const int32_t n, - const int32_t c, const float alpha, const float gamma, T *output) { - /* - * NRAM partition - * |-----------------------------------------------------------------------| - * | weight | - * |------------------------------- COMPUTE -------------------------------| - * | | | - * | computeA | computeB | - * | | | - * |------------- PING ------------------------------- PONG ---------------| - * | | | - * | input | input | - * | | | - * |-----------------------------------|-----------------------------------| - * | | | - * | output | output | - * | | | - * |-----------------------------------|-----------------------------------| - * | target | target | - * |-----------------------------------|-----------------------------------| - * - * split_pipeline_num is 6: COMPUTE(computeA,computeB), PING(input,output), - * PONG(input,output). - * split_target_num is 2: PING(target), PONG(target). - * weight is not NULL: - * The nram-size of weight is equal to c_align_size when partition input-N. - * The nram-size of weight is equal to NFU_ALIGN_SIZE when partition - * input-NC. - */ - - // calculate threshold of c - const int32_t split_pipeline_num = 6; - const int32_t split_target_num = 2; - const int32_t has_weight = weight != NULL; - const int32_t threshold_c = - PAD_DOWN((MAX_NRAM_SIZE - split_target_num * sizeof(int32_t)) / - (split_pipeline_num + has_weight), - NFU_ALIGN_SIZE) / - sizeof(T); - const int32_t c_align = PAD_UP(c, NFU_ALIGN_SIZE / sizeof(T)); - const int32_t c_align_size = c_align * sizeof(T); - - if (c <= threshold_c) { - // partition inputN - int32_t c_num = c; - int32_t reservered_align_size = - (split_target_num + split_pipeline_num) * NFU_ALIGN_SIZE; - int32_t weight_size = 0; - if (has_weight) { - c_num = c_align; - reservered_align_size = split_target_num * NFU_ALIGN_SIZE; - weight_size = c_align_size; - } - - const int32_t remain_size = - MAX_NRAM_SIZE - weight_size - reservered_align_size; - const int32_t n_seg = - remain_size / (split_pipeline_num * c_num * sizeof(T) + - split_target_num * sizeof(int32_t)); - const int32_t split_pipeline_size = - PAD_UP(c_num * n_seg * sizeof(T), NFU_ALIGN_SIZE); - const int32_t compute_size = 2 * split_pipeline_size; - const int32_t pingpong_offset = (MAX_NRAM_SIZE - weight_size - compute_size) / 2; - - char *nram_weight = (char *)nram_buffer; - char *nram_compute_a = nram_weight + has_weight * c_align_size; - char *nram_compute_b = nram_compute_a + split_pipeline_size; - char *nram_input = nram_compute_b + split_pipeline_size; - char *nram_output = nram_input + split_pipeline_size; - char *nram_target = nram_output + split_pipeline_size; - - startPipeline(input, target, weight, nram_compute_a, nram_compute_b, - nram_input, nram_target, nram_weight, nram_output, - has_weight, 0, pingpong_offset, 0, 0, n, n_seg, c, c, - alpha, gamma, output); - } else { - // partition inputNC - const int32_t weight_size = has_weight * NFU_ALIGN_SIZE; - const int32_t remain_size = MAX_NRAM_SIZE - weight_size; - const int32_t split_pipeline_size = PAD_DOWN( - (remain_size - split_target_num * NFU_ALIGN_SIZE) / split_pipeline_num, - NFU_ALIGN_SIZE); - const int32_t c_seg = split_pipeline_size / sizeof(T); - const int32_t n_seg = 1; - const int32_t compute_size = 2 * split_pipeline_size; - const int32_t pingpong_offset = (MAX_NRAM_SIZE - weight_size - compute_size) / 2; - const int32_t pingpong_weight_offset = weight_size / 2; - - char *nram_weight = (char *)nram_buffer; - char *nram_compute_a = nram_weight + weight_size; - char *nram_compute_b = nram_compute_a + split_pipeline_size; - char *nram_input = nram_compute_b + split_pipeline_size; - char *nram_output = nram_input + split_pipeline_size; - char *nram_target = nram_output + split_pipeline_size; - - const int32_t loop_num = (c + c_seg - 1) / c_seg; - const int32_t partition_nc = 1; - for (int32_t i = 0; i < loop_num; ++i) { - const int32_t c_index = i * c_seg; - const int32_t c_seg_curr = i == (loop_num - 1) ? c - c_index : c_seg; - startPipeline(input, target, weight, nram_compute_a, nram_compute_b, - nram_input, nram_target, nram_weight, nram_output, - has_weight, partition_nc, pingpong_offset, - pingpong_weight_offset, c_index, n, n_seg, c, c_seg_curr, - alpha, gamma, output); - } - } -} - -template -__mlu_global__ void MLUUnion1KernelFocalLossSigmoidForward( - const void *input, const void *target, const void *weight, const int32_t N, - const int32_t C, const float alpha, const float gamma, void *output) { - const int32_t n_seg = N / taskDim + (taskId == taskDim - 1) * (N % taskDim); - const T *input_offset = (T *)input + N / taskDim * taskId * C; - const int32_t *target_offset = (int32_t *)target + N / taskDim * taskId; - T *output_offset = (T *)output + N / taskDim * taskId * C; - - focalLossSigmoidForwardBlock((T *)input_offset, (int32_t *)target_offset, - (T *)weight, n_seg, C, alpha, gamma, - (T *)output_offset); -} -} // namespace forward - -namespace backward { -template -__mlu_func__ void loadInput(char *nram_input, char *nram_target, - const T *gdram_input, const int32_t *gdram_target, - const int32_t deal_n, const int32_t total_c, - const bool pingping_flag, const bool has_weight, - const int32_t nram_offset, - const int32_t gdram_offset) { - if (pingping_flag == PONG) { - nram_input += nram_offset; - nram_target += nram_offset; - } - - __memcpy_async(nram_target, gdram_target + gdram_offset / total_c, - deal_n * sizeof(int32_t), GDRAM2NRAM); - - char *nram_input_load = nram_input; - int32_t compute_align_size = 2 * NFU_ALIGN_SIZE; - if (has_weight) { - if (sizeof(T) == sizeof(half)) { - int32_t compute_align_num = compute_align_size / sizeof(float); - int32_t align_c = PAD_UP(total_c, compute_align_num); - int32_t compute_size = deal_n * align_c * sizeof(float); - nram_input_load += compute_size / 2; - } - int32_t align_c = PAD_UP(total_c, NFU_ALIGN_SIZE / sizeof(T)); - int32_t total_c_size = total_c * sizeof(T); - int32_t align_c_size = align_c * sizeof(T); - __memcpy_async(nram_input_load, gdram_input + gdram_offset, total_c_size, - GDRAM2NRAM, align_c_size, total_c_size, deal_n - 1); - } else { - if (sizeof(T) == sizeof(half)) { - int32_t compute_size = - PAD_UP(deal_n * total_c * sizeof(float), compute_align_size); - nram_input_load += compute_size / 2; - } - int32_t load_size = deal_n * total_c * sizeof(T); - __memcpy_async(nram_input_load, gdram_input + gdram_offset, load_size, - GDRAM2NRAM); - } -} - -template -__mlu_func__ void sigmoid(T *dst_data, const T *src_data, - const int32_t elem_count) { - __bang_mul_const(dst_data, (T *)src_data, T(-1), elem_count); - __bang_active_exphp(dst_data, dst_data, elem_count); - __bang_add_const(dst_data, dst_data, T(1), elem_count); - __bang_active_reciphp(dst_data, dst_data, elem_count); -} - -template -__mlu_func__ void coreCompute(char *nram_input, const T *nram_weight, - const float *nram_flt_min, char *nram_pt, - char *nram_alpha_t, char *nram_temp, - char *nram_target, const float *nram_gamma, - char *nram_output, const float alpha, - const int32_t compute_num, const int32_t deal_n, - const int32_t total_c, const bool pingpong_flag, - const int32_t nram_offset, - const bool has_weight) { - if (pingpong_flag == PONG) { - nram_input += nram_offset; - nram_pt += nram_offset; - nram_alpha_t += nram_offset; - nram_temp += nram_offset; - nram_output += nram_offset; - nram_target += nram_offset; - } - - if (sizeof(T) == sizeof(half)) { - const int32_t compute_size = compute_num * sizeof(float); - char *nram_input_load = nram_input + compute_size / 2; - __bang_half2float((float *)nram_input, (half *)nram_input_load, - compute_num); - } - - // 0. alpha_t = alpha - 1 - __nramset((float *)nram_alpha_t, compute_num, (float)(alpha - 1.0)); - - // 1. pt = 1 - sigmoid(x) - sigmoid((float *)nram_pt, (float *)nram_input, compute_num); - __bang_mul_const((float *)nram_pt, (float *)nram_pt, (float)(-1), - compute_num); - __bang_add_const((float *)nram_pt, (float *)nram_pt, (float)1, compute_num); - - // 2. pt = target[n] == c ? sigmoid(x) : 1 - sigmoid(x) - // alpha_t = target[n] == c ? alpha : alpha - 1 - const int32_t nfu_align_num = NFU_ALIGN_SIZE / sizeof(float); - for (int n = 0; n < deal_n; n++) { - const int32_t target_value = ((int32_t *)nram_target)[n]; - if (target_value >= total_c || target_value < 0) continue; - int32_t c_offset = 0; - if (has_weight) { - int32_t c_align_num = nfu_align_num; - if (sizeof(T) == sizeof(half)) { - c_align_num += nfu_align_num; - } - c_offset = PAD_UP(total_c, c_align_num); - } else { - c_offset = total_c; - } - int32_t idx = n * c_offset + target_value; - *((float *)nram_pt + idx) = 1.0 - *((float *)nram_pt + idx); - *((float *)nram_alpha_t + idx) = alpha; - } - - // 3. temp = -alpha_t * e^(gamma * log(max(1 - pt, FLT_MIN)) - __bang_mul_const((float *)nram_temp, (float *)nram_pt, (float)(-1), - compute_num); - __bang_add_const((float *)nram_temp, (float *)nram_temp, (float)(1), - compute_num); - __bang_cycle_maxequal((float *)nram_temp, (float *)nram_temp, - (float *)nram_flt_min, compute_num, nfu_align_num); - __bang_active_loghp((float *)nram_temp, (float *)nram_temp, compute_num); - __bang_cycle_mul((float *)nram_temp, (float *)nram_temp, (float *)nram_gamma, - compute_num, nfu_align_num); - __bang_active_exphp((float *)nram_temp, (float *)nram_temp, compute_num); - __bang_mul((float *)nram_temp, (float *)nram_temp, (float *)nram_alpha_t, - compute_num); - __bang_mul_const((float *)nram_temp, (float *)nram_temp, (float)(-1), - compute_num); - - // 4. output = 1 - pt - gamma * pt * log(max(pt, FLT_MIN)) - __bang_cycle_maxequal((float *)nram_output, (float *)nram_pt, - (float *)nram_flt_min, compute_num, nfu_align_num); - __bang_active_loghp((float *)nram_output, (float *)nram_output, compute_num); - __bang_mul((float *)nram_output, (float *)nram_output, (float *)nram_pt, - compute_num); - __bang_cycle_mul((float *)nram_output, (float *)nram_output, - (float *)nram_gamma, compute_num, nfu_align_num); - __bang_add((float *)nram_output, (float *)nram_output, (float *)nram_pt, - compute_num); - __bang_mul_const((float *)nram_output, (float *)nram_output, (float)(-1), - compute_num); - __bang_add_const((float *)nram_output, (float *)nram_output, (float)(1), - compute_num); - - // 5. output = output * temp - __bang_mul((float *)nram_output, (float *)nram_output, (float *)nram_temp, - compute_num); - - if (sizeof(T) == sizeof(half)) { - __bang_float2half_rd((half *)nram_output, (float *)nram_output, - compute_num); - } - - if (has_weight) { - // with weight - for (int n = 0; n < deal_n; n++) { - int32_t c_align_num = nfu_align_num; - if (sizeof(T) == sizeof(half)) { - c_align_num += nfu_align_num; - } - int32_t align_c = PAD_UP(total_c, c_align_num); - int32_t target_value = ((int32_t *)nram_target)[n]; - T weight_value = nram_weight[target_value]; - __bang_mul_const((T *)nram_output + n * align_c, - (T *)nram_output + n * align_c, weight_value, align_c); - } - } -} - -template -__mlu_func__ void storeOutput(T *gdram_output, const char *nram_output, - const int32_t deal_n, const int32_t total_c, - const bool pingpong_flag, const bool has_weight, - const int32_t nram_offset, - const int32_t gdram_offset) { - if (pingpong_flag == PONG) { - nram_output += nram_offset; - } - const int32_t store_size = deal_n * total_c * sizeof(T); - if (has_weight) { - int32_t align_c = PAD_UP(total_c, NFU_ALIGN_SIZE / sizeof(T)); - int32_t total_c_size = total_c * sizeof(T); - int32_t align_c_size = align_c * sizeof(T); - __memcpy_async(gdram_output + gdram_offset, nram_output, total_c_size, - NRAM2GDRAM, total_c_size, align_c_size, deal_n - 1); - } else { - __memcpy_async(gdram_output + gdram_offset, nram_output, store_size, - NRAM2GDRAM); - } -} - -template -__mlu_func__ void focalLossSigmoidBackwardBlock( - const T *input, const int32_t *target, const T *weight, const float gamma, - const float alpha, const int32_t total_n, const int32_t deal_n, - const int32_t total_c, T *output) { - // params per time slice - int32_t deal_num = deal_n * total_c; - int32_t deal_size = deal_num * sizeof(float); - int32_t compute_num = 0; - int32_t compute_size = 0; - int32_t compute_align_size = NFU_ALIGN_SIZE; - const int32_t nfu_align_num = NFU_ALIGN_SIZE / sizeof(T); - if (sizeof(T) == sizeof(half)) { - compute_align_size += NFU_ALIGN_SIZE; - } - const int32_t compute_align_num = compute_align_size / sizeof(float); - bool has_weight = false; - if (weight != NULL) { - has_weight = true; - int32_t align_c = PAD_UP(total_c, compute_align_num); - compute_num = deal_n * align_c; - compute_size = compute_num * sizeof(float); - } else { - compute_size = PAD_UP(deal_size, compute_align_size); - compute_num = compute_size / sizeof(float); - } - - // params per core - int32_t total_num = total_n * total_c; - int32_t num_per_core = PAD_DOWN(total_num / taskDim, deal_num); - int32_t loop_per_core = num_per_core / deal_num; - - /* NRAM partition: - * - * |-----------------ping pong--------------------| - * |input | pt | alpha_t | temp | output | target | flt_min | gamma | weight| - * - * split_pipeline_num is 5: input, pt, alpha_t, temp, output. - * nram_reserved_line_num is 2: flt_min, gamma. - */ - const int32_t split_pipeline_num = 5; - const int32_t nram_reserved_line_num = 2; - int32_t target_deal_size = deal_n * sizeof(int32_t); - int32_t target_deal_size_align = PAD_UP(target_deal_size, NFU_ALIGN_SIZE); - // nram PING/PONG offset - int32_t ping_pong_offset = - compute_size * split_pipeline_num + target_deal_size_align; - - // gdram addr - int32_t *base_addr_target = - (int32_t *)target + taskId * loop_per_core * deal_n; - T *base_addr_input = (T *)input + taskId * num_per_core; - T *base_addr_output = output + taskId * num_per_core; - - // nram addr - char *nram_input = (char *)nram_buffer; - char *nram_pt = nram_input + compute_size; - char *nram_alpha_t = nram_pt + compute_size; - char *nram_temp = nram_alpha_t + compute_size; - char *nram_output = nram_temp + compute_size; - char *nram_target = nram_output + compute_size; - float *nram_flt_min = NULL; - float *nram_gamma = NULL; - T *nram_weight = NULL; - - if (!has_weight) { - nram_flt_min = (float *)(nram_buffer + MAX_NRAM_SIZE - - nram_reserved_line_num * NFU_ALIGN_SIZE); - nram_gamma = nram_flt_min + nfu_align_num; - } else { - int32_t weight_space = PAD_UP(total_c * sizeof(T), NFU_ALIGN_SIZE); - nram_flt_min = - (float *)(nram_buffer + MAX_NRAM_SIZE - - nram_reserved_line_num * NFU_ALIGN_SIZE - weight_space); - nram_gamma = nram_flt_min + nfu_align_num; - nram_weight = (T *)(nram_gamma + nfu_align_num); - __memcpy_async(nram_weight, weight, total_c * sizeof(T), GDRAM2NRAM); - } - - // nram set gamma and FLT_MIN - __nramset(nram_gamma, nfu_align_num, gamma); - __nramset(nram_flt_min, nfu_align_num, FLT_MIN); - - /* - * Pipeline: The pipeline is processed in three stages: Load, Compute, Store. - * The allocated memory space of NRAM is divided into two parts: - * PING and Pong. In a single time slice, PING is used to process - * IO stream and PONG is used for computation. Both of them are - * processed synchronously until finished. - * - * diagram of PINGPONG: - * |------|-----------------------------------------------------------------| - * | | space | - * |------|-----------------------------------------------------------------| - * | time | Ping | Pong | Ping | Pong | Ping | Pong | - * |------|-----------------------------------------------------------------| - * | 0 | L0 | | | | | | - * | 1 | C0 | L1 | | | | | - * | 2 | S0 | C1 | L2 | | | | - * | 3 | | S1 | C2 | L3 | | | - * | 4 | | | S2 | C3 | L4 | | - * | 5 | | | | S3 | C4 | L5 | - * | 6 | | | | | S4 | C5 | - * | 7 | | | | | | S5 | - * |------|-----------------------------------------------------------------| - */ - - // diagram of PINGPONG: L0 - if (loop_per_core > 0) { - loadInput(nram_input, nram_target, base_addr_input, base_addr_target, - deal_n, total_c, PING, has_weight, ping_pong_offset, 0); - __asm__ volatile("sync;"); - } - - // diagram of PINGPONG: C0 and L1 - if (loop_per_core > 1) { - coreCompute(nram_input, nram_weight, nram_flt_min, nram_pt, nram_alpha_t, - nram_temp, nram_target, nram_gamma, nram_output, alpha, - compute_num, deal_n, total_c, PING, ping_pong_offset, - has_weight); - loadInput(nram_input, nram_target, base_addr_input, base_addr_target, - deal_n, total_c, PONG, has_weight, ping_pong_offset, deal_num); - __asm__ volatile("sync;"); - } - - for (int i = 0; i < loop_per_core - 2; ++i) { - if (i % 2 == PING) { - storeOutput(base_addr_output, nram_output, deal_n, total_c, PING, - has_weight, ping_pong_offset, i * deal_num); - coreCompute(nram_input, nram_weight, nram_flt_min, nram_pt, nram_alpha_t, - nram_temp, nram_target, nram_gamma, nram_output, alpha, - compute_num, deal_n, total_c, PONG, ping_pong_offset, - has_weight); - loadInput(nram_input, nram_target, base_addr_input, base_addr_target, - deal_n, total_c, PING, has_weight, ping_pong_offset, - (i + 2) * deal_num); - } else { - storeOutput(base_addr_output, nram_output, deal_n, total_c, PONG, - has_weight, ping_pong_offset, i * deal_num); - coreCompute(nram_input, nram_weight, nram_flt_min, nram_pt, nram_alpha_t, - nram_temp, nram_target, nram_gamma, nram_output, alpha, - compute_num, deal_n, total_c, PING, ping_pong_offset, - has_weight); - loadInput(nram_input, nram_target, base_addr_input, base_addr_target, - deal_n, total_c, PONG, has_weight, ping_pong_offset, - (i + 2) * deal_num); - } - __asm__ volatile("sync;"); - } - - if (loop_per_core > 1) { - if ((loop_per_core - 2) % 2 == PING) { - storeOutput(base_addr_output, nram_output, deal_n, total_c, PING, - has_weight, ping_pong_offset, (loop_per_core - 2) * deal_num); - coreCompute(nram_input, nram_weight, nram_flt_min, nram_pt, nram_alpha_t, - nram_temp, nram_target, nram_gamma, nram_output, alpha, - compute_num, deal_n, total_c, PONG, ping_pong_offset, - has_weight); - } else { - storeOutput(base_addr_output, nram_output, deal_n, total_c, PONG, - has_weight, ping_pong_offset, (loop_per_core - 2) * deal_num); - coreCompute(nram_input, nram_weight, nram_flt_min, nram_pt, nram_alpha_t, - nram_temp, nram_target, nram_gamma, nram_output, alpha, - compute_num, deal_n, total_c, PING, ping_pong_offset, - has_weight); - } - __asm__ volatile("sync;"); - } - - if (loop_per_core > 0) { - if (loop_per_core == 1) { - coreCompute(nram_input, nram_weight, nram_flt_min, nram_pt, nram_alpha_t, - nram_temp, nram_target, nram_gamma, nram_output, alpha, - compute_num, deal_n, total_c, PING, ping_pong_offset, - has_weight); - __asm__ volatile("sync;"); - } - if ((loop_per_core - 1) % 2 == PING) { - storeOutput(base_addr_output, nram_output, deal_n, total_c, PING, - has_weight, ping_pong_offset, (loop_per_core - 1) * deal_num); - } else { - storeOutput(base_addr_output, nram_output, deal_n, total_c, PONG, - has_weight, ping_pong_offset, (loop_per_core - 1) * deal_num); - } - } - - // process the remaining data which N remainder per core is less than deal_n - int32_t rem_for_all = total_num - num_per_core * taskDim; - if (rem_for_all == 0) return; - int32_t rem_n_for_all = rem_for_all / total_c; - int32_t rem_n_per_core = (rem_n_for_all + taskDim - 1) / taskDim; - int32_t rem_num_per_core = rem_n_per_core * total_c; - int32_t rem_num_per_core_align = 0; - int32_t rem_core_num = rem_for_all / rem_num_per_core; - - int32_t rem_n_for_last = rem_n_for_all % rem_n_per_core; - int32_t rem_num_for_last = rem_n_for_last * total_c; - int32_t rem_num_for_last_align = 0; - - if (has_weight) { - int32_t align_c = PAD_UP(total_c, compute_align_num); - rem_num_per_core_align = rem_n_per_core * align_c; - rem_num_for_last_align = rem_n_for_last * align_c; - } else { - rem_num_per_core_align = PAD_UP(rem_num_per_core, compute_align_num); - rem_num_for_last_align = PAD_UP(rem_num_for_last, compute_align_num); - } - - int32_t rem_addr_base = num_per_core * taskDim; - int32_t rem_target_addr_base = loop_per_core * deal_n * taskDim; - base_addr_target = (int32_t *)target + rem_target_addr_base; - base_addr_input = (T *)input + rem_addr_base; - base_addr_output = output + rem_addr_base; - - if (taskId < rem_core_num) { - loadInput(nram_input, nram_target, base_addr_input, base_addr_target, - rem_n_per_core, total_c, PING, has_weight, ping_pong_offset, - taskId * rem_num_per_core); - __asm__ volatile("sync;"); - coreCompute(nram_input, nram_weight, nram_flt_min, nram_pt, nram_alpha_t, - nram_temp, nram_target, nram_gamma, nram_output, alpha, - rem_num_per_core_align, rem_n_per_core, total_c, PING, - ping_pong_offset, has_weight); - __asm__ volatile("sync;"); - storeOutput(base_addr_output, nram_output, rem_n_per_core, total_c, PING, - has_weight, ping_pong_offset, taskId * rem_num_per_core); - } else if (taskId == rem_core_num) { - if (rem_num_for_last == 0) return; - loadInput(nram_input, nram_target, base_addr_input, base_addr_target, - rem_n_for_last, total_c, PING, has_weight, ping_pong_offset, - taskId * rem_num_per_core); - __asm__ volatile("sync;"); - coreCompute(nram_input, nram_weight, nram_flt_min, nram_pt, nram_alpha_t, - nram_temp, nram_target, nram_gamma, nram_output, alpha, - rem_num_for_last_align, rem_n_for_last, total_c, PING, - ping_pong_offset, has_weight); - __asm__ volatile("sync;"); - storeOutput(base_addr_output, nram_output, rem_n_for_last, total_c, PING, - has_weight, ping_pong_offset, taskId * rem_num_per_core); - } else { - return; - } -} - -template -__mlu_global__ void MLUUnion1KernelFocalLossSigmoidBackward( - const void *input, const void *target, const void *weight, - const float gamma, const float alpha, const int32_t total_n, - const int32_t deal_n, const int32_t total_c, void *output) { - focalLossSigmoidBackwardBlock((T *)input, (int32_t *)target, (T *)weight, - gamma, alpha, total_n, deal_n, total_c, - (T *)output); -} -} // namespace backward - -void KernelFocalLossSigmoidForward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, - const cnrtDataType_t d_type, - const void *input, const void *target, - const void *weight, const int32_t N, - const int32_t C, const float alpha, - const float gamma, void *output) { - if (d_type == CNRT_FLOAT16) { - forward::MLUUnion1KernelFocalLossSigmoidForward< - half><<>>(input, target, weight, N, C, alpha, - gamma, output); - } else { - forward::MLUUnion1KernelFocalLossSigmoidForward< - float><<>>(input, target, weight, N, C, alpha, - gamma, output); - } -} - -void KernelFocalLossSigmoidBackward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, - const cnrtDataType_t d_type, - const void *input, const void *target, - const void *weight, const float gamma, - const float alpha, const int32_t dim_n, - const int32_t deal_n, const int32_t dim_c, - void *output) { - if (d_type == CNRT_FLOAT16) { - backward::MLUUnion1KernelFocalLossSigmoidBackward< - half><<>>(input, target, weight, gamma, alpha, - dim_n, deal_n, dim_c, output); - } else { - backward::MLUUnion1KernelFocalLossSigmoidBackward< - float><<>>(input, target, weight, gamma, alpha, - dim_n, deal_n, dim_c, output); - } -} diff --git a/mmcv/ops/csrc/common/mlu/iou3d_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/iou3d_mlu_kernel.mlu deleted file mode 100644 index 84e53aa..0000000 --- a/mmcv/ops/csrc/common/mlu/iou3d_mlu_kernel.mlu +++ /dev/null @@ -1,431 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ - -#include "common_mlu_helper.hpp" -#include "iou3d_utils.hpp" - -#define SIZE_SRAM_BUF (MAX_SRAM_SIZE) - -/* NRAM buffer - * Suppose deal N boxes once time. ----------------------------------------------------------------- -| Basic |score (1N)+ |intersect_pts(48N)| | -| |valid_box(1N) |+ ordered_pts(48N)| temp_long(72N) | -| |+ temp_buffer(10N)| | | -|--------------------------|------------------|----------------| -| Reuse | null | null |rotated_pts(16N)| -|-------|------------------|------------------|----------------| - ---------------------------------------------------------------------------- -| Basic | dist_ram(24N) | valid_pts(24N) |box1(5N) |box1_buffer(5KB) | -| | |+ nums_in_ram(1N)|+ box2(5N)|+nram_save(5KB) | -|--------------------------|-----------------|----------|-----------------| -| Reuse | vec_buffer(5N) | null | null | null | -|-------|------------------|-----------------|----------|-----------------| -Total Basic Memory Size = 239N * sizeof(float) + 10KB -*/ - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; -__mlu_shared__ char sram_buffer[SIZE_SRAM_BUF]; - -template -__mlu_func__ void iou3D_detection(int32_t &result_box_num, int32_t *output_data, - const T *boxes_data, float *scores_data, - const int core_limit, const int input_box_num, - const float iou_threshold, - mluMemcpyDirection_t scores_load_dir, - mluMemcpyDirection_t scores_store_dir, - mluMemcpyDirection_t boxes_load_dir) { - // NRAM divide by (2+4*COMPUTE_COUNT_ALIGN) copies of NRAM, counted by bytes - const int nram_save_limit_count = 256; - int box_read_limit_count = 256; - float div_thresh_iou = 1.0 / iou_threshold; - // every box require 239 * sizeof(float) space in nram; - const int32_t copies_of_nram = 239 * sizeof(float); - const int32_t limit = (MAX_NRAM_SIZE - 5 * box_read_limit_count * sizeof(T) - - nram_save_limit_count * sizeof(int32_t)) / - copies_of_nram; - - // x,y,z,dx,dy,dz,angle - const T *input_x_ptr = boxes_data; - const T *input_y_ptr = input_x_ptr + input_box_num; - const T *input_dx_ptr = input_y_ptr + 2 * input_box_num; - const T *input_dy_ptr = input_dx_ptr + input_box_num; - const T *input_angle_ptr = input_dy_ptr + 2 * input_box_num; - float *input_score_ptr = scores_data; - - // data split - int avg_cluster = 0; - int rem_cluster = 0; - int len_cluster = 0; - int cluster_offset = 0; - if (clusterDim > 0) { - // union - avg_cluster = input_box_num / clusterDim; - rem_cluster = input_box_num % clusterDim; - len_cluster = avg_cluster + (clusterId < rem_cluster ? 1 : 0); - cluster_offset = avg_cluster * clusterId + - (clusterId <= rem_cluster ? clusterId : rem_cluster); - } else { - // block - len_cluster = input_box_num; - cluster_offset = 0; - } - int len_core = input_box_num; - int input_offset = 0; - if (core_limit > 1) { - int avg_core = len_cluster / coreDim; - int rem_core = len_cluster % coreDim; - len_core = avg_core + (coreId < rem_core ? 1 : 0); - int core_offset = - avg_core * coreId + (coreId <= rem_core ? coreId : rem_core); - input_offset = cluster_offset + core_offset; - } - - int32_t max_seg_pad = IOU3D_DOWN(limit, IOU3D_SIZE); - int repeat_iou_compute = len_core / max_seg_pad; - int remain_iou_compute = len_core % max_seg_pad; - - // basic consistent memory layout - void *score = ((char *)nram_buffer); - void *valid_box = ((char *)score) + 1 * max_seg_pad * sizeof(float); - void *temp_buffer = ((char *)valid_box) + 1 * max_seg_pad * sizeof(float); - void *intersect_pts_x = - ((char *)temp_buffer) + 10 * max_seg_pad * sizeof(float); - void *intersect_pts_y = - ((char *)intersect_pts_x) + 24 * max_seg_pad * sizeof(float); - void *ordered_pts_x = - ((char *)intersect_pts_y) + 24 * max_seg_pad * sizeof(float); - void *ordered_pts_y = - ((char *)ordered_pts_x) + 24 * max_seg_pad * sizeof(float); - void *temp_long_1 = - ((char *)ordered_pts_y) + 24 * max_seg_pad * sizeof(float); - void *temp_long_2 = ((char *)temp_long_1) + 24 * max_seg_pad * sizeof(float); - void *temp_long_3 = ((char *)temp_long_2) + 24 * max_seg_pad * sizeof(float); - void *dist_ram = ((char *)temp_long_3) + 24 * max_seg_pad * sizeof(float); - void *valid_pts = ((char *)dist_ram) + 24 * max_seg_pad * sizeof(float); - void *nums_in_ram = ((char *)valid_pts) + 24 * max_seg_pad * sizeof(float); - T *box1 = (T *)(((char *)nums_in_ram) + 1 * max_seg_pad * sizeof(float)); - T *box2 = (T *)(((char *)box1) + 5 * max_seg_pad * sizeof(float)); - void *box1_buffer = ((char *)box2) + 5 * max_seg_pad * sizeof(float); - int32_t *nram_save = - (int32_t *)(((char *)box1_buffer) + 5 * box_read_limit_count * sizeof(T)); - // nram_save ~ nram_save_limit_count * sizeof(int32_t) - int nram_save_count = 0; - - // reuse memory - void *rotated_pts1_x = ((char *)dist_ram); - void *rotated_pts1_y = - ((char *)rotated_pts1_x) + 4 * max_seg_pad * sizeof(float); - void *rotated_pts2_x = - ((char *)rotated_pts1_y) + 4 * max_seg_pad * sizeof(float); - void *rotated_pts2_y = - ((char *)rotated_pts2_x) + 4 * max_seg_pad * sizeof(float); - void *vec_buffer = ((char *)temp_long_1) + 5 * max_seg_pad * sizeof(float); - // vec_buffer ~ 16 * max_seg_pad * sizeof(float) - - // First, initialize ram with all 0, or could cause nan/inf unexcepted results - __bang_write_zero((unsigned char *)nram_buffer, copies_of_nram * max_seg_pad); - // number 8 and 0xff relay on box_read_limit_count initial as 256 - const int max_box_seg_id = (input_box_num - 1) >> 8; - const int last_rem_box_number = ((input_box_num - 1) & 0xff) + 1; - for (int32_t cur_box = 0; cur_box < input_box_num; ++cur_box) { - __sync_all(); - int box_seg_id = cur_box >> 8, box_id = cur_box & 0xff; - box_read_limit_count = box_seg_id == max_box_seg_id ? last_rem_box_number - : box_read_limit_count; - if (box_id == 0) { - // x,y,z,dx,dy,dz,angle - int offset_num = box_seg_id << 8; - // x - __memcpy((char *)box1_buffer, input_x_ptr + offset_num, - box_read_limit_count * 1 * sizeof(T), boxes_load_dir, - box_read_limit_count * 1 * sizeof(T), - box_read_limit_count * 1 * sizeof(T), 0); - // y - __memcpy((char *)box1_buffer + box_read_limit_count * 1 * sizeof(T), - input_y_ptr + offset_num, box_read_limit_count * 1 * sizeof(T), - boxes_load_dir, box_read_limit_count * 1 * sizeof(T), - box_read_limit_count * 1 * sizeof(T), 0); - // dx - __memcpy((char *)box1_buffer + box_read_limit_count * 2 * sizeof(T), - input_dx_ptr + offset_num, box_read_limit_count * 1 * sizeof(T), - boxes_load_dir, box_read_limit_count * 1 * sizeof(T), - box_read_limit_count * 1 * sizeof(T), 0); - // dy - __memcpy((char *)box1_buffer + box_read_limit_count * 3 * sizeof(T), - input_dy_ptr + offset_num, box_read_limit_count * 1 * sizeof(T), - boxes_load_dir, box_read_limit_count * 1 * sizeof(T), - box_read_limit_count * 1 * sizeof(T), 0); - // angle - __memcpy((char *)box1_buffer + box_read_limit_count * 4 * sizeof(T), - input_angle_ptr + offset_num, - box_read_limit_count * 1 * sizeof(T), boxes_load_dir, - box_read_limit_count * 1 * sizeof(T), - box_read_limit_count * 1 * sizeof(T), 0); - } - if (((float *)input_score_ptr)[cur_box] == 0) { - continue; - } - // save result - nram_save[nram_save_count] = cur_box; - result_box_num++; - nram_save_count++; - if (clusterId == 0 && coreId == 0 && - nram_save_count == nram_save_limit_count) { - pvLock(); - __memcpy(output_data, nram_save, nram_save_count * sizeof(int32_t), - NRAM2GDRAM); - pvUnlock(); - output_data += nram_save_count; - nram_save_count = 0; - } - // prepare box1 - // x - __bang_write_value((float *)box1, max_seg_pad, - float(((T *)box1_buffer)[box_id])); - // y - __bang_write_value( - (float *)box1 + max_seg_pad, max_seg_pad, - float(((T *)box1_buffer)[box_id + 1 * box_read_limit_count])); - // dx - __bang_write_value( - (float *)box1 + max_seg_pad * 2, max_seg_pad, - float(((T *)box1_buffer)[box_id + 2 * box_read_limit_count])); - // dy - __bang_write_value( - (float *)box1 + max_seg_pad * 3, max_seg_pad, - float(((T *)box1_buffer)[box_id + 3 * box_read_limit_count])); - // angle - __bang_write_value( - (float *)box1 + max_seg_pad * 4, max_seg_pad, - float(((T *)box1_buffer)[box_id + 4 * box_read_limit_count])); - - float max_area = 1.0f * - ((T *)box1_buffer)[box_id + 2 * box_read_limit_count] * - ((T *)box1_buffer)[box_id + 3 * box_read_limit_count]; - // update score - - for (int i = 0; i <= repeat_iou_compute; i++) { - if (i == repeat_iou_compute && remain_iou_compute == 0) { - break; - } - int seg_len = max_seg_pad; - int cpy_len = - (i == repeat_iou_compute) ? remain_iou_compute : max_seg_pad; - // int half_offset = std::is_same::value ? max_seg_pad * 5 : 0; - int half_offset = (sizeof(T) == sizeof(half)) ? max_seg_pad * 5 : 0; - // score - __memcpy(score, input_score_ptr + input_offset + i * max_seg_pad, - cpy_len * sizeof(float), scores_load_dir, - cpy_len * sizeof(float), cpy_len * sizeof(float), 0); - // x - __memcpy(box2 + half_offset, input_x_ptr + input_offset + i * max_seg_pad, - cpy_len * 1 * sizeof(T), boxes_load_dir, cpy_len * 1 * sizeof(T), - cpy_len * 1 * sizeof(T), 0); - // y - __memcpy(box2 + half_offset + seg_len * 1, - input_y_ptr + input_offset + i * max_seg_pad, - cpy_len * 1 * sizeof(T), boxes_load_dir, cpy_len * 1 * sizeof(T), - cpy_len * 1 * sizeof(T), 0); - // dx - __memcpy(box2 + half_offset + seg_len * 2, - input_dx_ptr + input_offset + i * max_seg_pad, - cpy_len * 1 * sizeof(T), boxes_load_dir, cpy_len * 1 * sizeof(T), - cpy_len * 1 * sizeof(T), 0); - // dy - __memcpy(box2 + half_offset + seg_len * 3, - input_dy_ptr + input_offset + i * max_seg_pad, - cpy_len * 1 * sizeof(T), boxes_load_dir, cpy_len * 1 * sizeof(T), - cpy_len * 1 * sizeof(T), 0); - // angle - __memcpy(box2 + half_offset + seg_len * 4, - input_angle_ptr + input_offset + i * max_seg_pad, - cpy_len * 1 * sizeof(T), boxes_load_dir, cpy_len * 1 * sizeof(T), - cpy_len * 1 * sizeof(T), 0); - // if (std::is_same::value) { - if (sizeof(T) == sizeof(half)) { - __bang_half2float((float *)box2, (half *)(box2 + half_offset), - seg_len * 5); - } - - // Calculate rotated vertices - void *temp1_ram = ((char *)temp_buffer); - void *temp2_ram = ((char *)temp_buffer) + seg_len * sizeof(float); - void *temp3_ram = ((char *)temp_buffer) + 2 * seg_len * sizeof(float); - void *temp4_ram = ((char *)temp_buffer) + 3 * seg_len * sizeof(float); - getRotatedVertices((float *)rotated_pts1_x, (float *)rotated_pts1_y, - (float *)box1, (float *)temp1_ram, (float *)temp2_ram, - (float *)temp3_ram, (float *)temp4_ram, seg_len); - getRotatedVertices((float *)rotated_pts2_x, (float *)rotated_pts2_y, - (float *)box2, (float *)temp1_ram, (float *)temp2_ram, - (float *)temp3_ram, (float *)temp4_ram, seg_len); - - __bang_write_zero((float *)valid_pts, 24 * seg_len); - __bang_write_zero((float *)nums_in_ram, seg_len); - __bang_write_value(((float *)valid_box), seg_len, 1.0f); - void *vec1_x = ((char *)vec_buffer); - void *vec1_y = ((char *)vec1_x) + 4 * seg_len * sizeof(float); - void *vec2_x = ((char *)vec1_y) + 4 * seg_len * sizeof(float); - void *vec2_y = ((char *)vec2_x) + 4 * seg_len * sizeof(float); - void *temp5_ram = ((char *)temp_buffer) + 4 * seg_len * sizeof(float); - void *temp6_ram = ((char *)temp_buffer) + 5 * seg_len * sizeof(float); - void *temp7_ram = ((char *)temp_buffer) + 6 * seg_len * sizeof(float); - void *temp8_ram = ((char *)temp_buffer) + 7 * seg_len * sizeof(float); - void *temp9_ram = ((char *)temp_buffer) + 8 * seg_len * sizeof(float); - void *temp10_ram = ((char *)temp_buffer) + 9 * seg_len * sizeof(float); - - // Get all intersection points - getIntersectPts( - (float *)rotated_pts1_x, (float *)rotated_pts1_y, - (float *)rotated_pts2_x, (float *)rotated_pts2_y, (float *)vec1_x, - (float *)vec1_y, (float *)vec2_x, (float *)vec2_y, - (float *)intersect_pts_x, (float *)intersect_pts_y, - (float *)valid_pts, (float *)nums_in_ram, (float *)temp1_ram, - (float *)temp2_ram, (float *)temp3_ram, (float *)temp4_ram, - (float *)temp5_ram, (float *)temp6_ram, (float *)temp7_ram, - (float *)temp8_ram, (float *)temp9_ram, (float *)temp10_ram, seg_len); - - // Where nums_in <= 2, set valid_box to false - __bang_write_value((float *)temp9_ram, COMPUTE_COUNT_ALIGN, (float)2); - __bang_cycle_gt((float *)temp1_ram, (float *)nums_in_ram, - (float *)temp9_ram, seg_len, COMPUTE_COUNT_ALIGN); - __bang_and((float *)valid_box, (float *)valid_box, (float *)temp1_ram, - seg_len); - __bang_cycle_and((float *)valid_pts, (float *)valid_pts, - (float *)valid_box, 24 * seg_len, seg_len); - - // Convex-hull-graham to order the intersection points in clockwise order - // and find the contour area - - convexHullGraham( - (float *)intersect_pts_x, (float *)intersect_pts_y, - (float *)ordered_pts_x, (float *)ordered_pts_y, (float *)dist_ram, - (float *)valid_box, (float *)valid_pts, (float *)nums_in_ram, - (float *)temp7_ram, (float *)temp8_ram, (float *)temp9_ram, - (float *)temp_long_1, (float *)temp_long_2, (float *)temp_long_3, - seg_len, seg_len); - // Calculate polygon area - // set temp1 = intersection part area - polygonArea((float *)ordered_pts_x, (float *)ordered_pts_y, - (float *)valid_box, (float *)valid_pts, (float *)nums_in_ram, - (float *)temp1_ram, (float *)temp2_ram, (float *)temp3_ram, - (float *)temp4_ram, (float *)temp5_ram, (float *)temp6_ram, - (float *)temp7_ram, (float *)temp8_ram, (float *)temp9_ram, - seg_len); - // area - __bang_mul((float *)temp2_ram, (float *)box2 + seg_len * 2, - (float *)box2 + seg_len * 3, seg_len); - // get the area_U: area + max_area - area_I - __bang_add_scalar((float *)temp2_ram, (float *)temp2_ram, float(max_area), - seg_len); - __bang_sub((float *)temp2_ram, (float *)temp2_ram, (float *)temp1_ram, - seg_len); // area_U - if (iou_threshold > 0.0) { - __bang_mul_scalar((float *)temp1_ram, (float *)temp1_ram, - div_thresh_iou, seg_len); - } else { - __bang_mul_scalar((float *)temp2_ram, (float *)temp2_ram, iou_threshold, - seg_len); - } - __bang_ge((float *)temp1_ram, (float *)temp2_ram, (float *)temp1_ram, - seg_len); - __bang_mul((float *)score, (float *)score, (float *)temp1_ram, seg_len); - - pvLock(); - __memcpy(input_score_ptr + input_offset + i * max_seg_pad, score, - cpy_len * sizeof(float), scores_store_dir, - cpy_len * sizeof(float), cpy_len * sizeof(float), 0); - pvUnlock(); - } - } - if (clusterId == 0 && coreId == 0 && nram_save_count) { - pvLock(); - __memcpy(output_data, nram_save, nram_save_count * sizeof(int32_t), - NRAM2GDRAM); - pvUnlock(); - } -} -__mlu_global__ void MLUBlockorUnionIKernelOU3D( - const void *input_boxes, const int input_box_num, const float iou_threshold, - const cnrtDataType_t data_type_input, void *workspace, void *result_num, - void *output) { - int input_dwidth = (data_type_input == CNRT_FLOAT32) ? 4 : 2; - mluMemcpyDirection_t scores_load_dir = GDRAM2NRAM; - mluMemcpyDirection_t scores_store_dir = NRAM2GDRAM; - mluMemcpyDirection_t boxes_load_dir = GDRAM2NRAM; - float *scores_data = (float *)workspace; - float *boxes_data = (float *)input_boxes; - const int cluster_score_size = input_box_num * sizeof(float); - const int cluster_boxes_size = input_box_num * 7 * input_dwidth; - char *sram_score = (char *)sram_buffer; - char *sram_boxes = (char *)sram_buffer + cluster_score_size; - if (clusterDim == 1 && SIZE_SRAM_BUF > cluster_score_size) { - scores_data = (float *)sram_score; - scores_load_dir = SRAM2NRAM; - scores_store_dir = NRAM2SRAM; - if (coreId == 0x80) { - __sramset((void *)sram_buffer, input_box_num, 1.0f); - } - } else { - if (coreId == 0) { - __gdramset(scores_data, input_box_num, 1.0f); - } - } - if (clusterDim == 1 && - SIZE_SRAM_BUF - cluster_score_size >= cluster_boxes_size) { - boxes_load_dir = SRAM2NRAM; - boxes_data = (float *)sram_boxes; - if (coreId == 0x80) { - __memcpy((char *)boxes_data, (char *)input_boxes, cluster_boxes_size, - GDRAM2SRAM); - } - } - __sync_cluster(); - - int32_t result_box_num = 0; - int32_t *out_data = (int32_t *)output; - - switch (data_type_input) { - default: { return; } - case CNRT_FLOAT16: { - iou3D_detection(result_box_num, out_data, (half *)boxes_data, scores_data, - taskDim, input_box_num, iou_threshold, scores_load_dir, - scores_store_dir, boxes_load_dir); - }; break; - case CNRT_FLOAT32: { - iou3D_detection(result_box_num, out_data, boxes_data, scores_data, - taskDim, input_box_num, iou_threshold, scores_load_dir, - scores_store_dir, boxes_load_dir); - }; break; - } - ((int32_t *)result_num)[0] = result_box_num; -} - -void KernelIou3d(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const cnrtDataType_t data_type_input, const void *boxes_dram, - const int input_box_num, const float iou_threshold, - void *workspace, void *output_size, void *output) { - switch (k_type) { - default: { return; } - case CNRT_FUNC_TYPE_BLOCK: - case CNRT_FUNC_TYPE_UNION1: - case CNRT_FUNC_TYPE_UNION2: - case CNRT_FUNC_TYPE_UNION4: - case CNRT_FUNC_TYPE_UNION8: - case CNRT_FUNC_TYPE_UNION16: { - MLUBlockorUnionIKernelOU3D<<>>( - (void *)boxes_dram, input_box_num, iou_threshold, data_type_input, - workspace, output_size, output); - }; break; - } -} diff --git a/mmcv/ops/csrc/common/mlu/iou3d_utils.hpp b/mmcv/ops/csrc/common/mlu/iou3d_utils.hpp deleted file mode 100644 index b98ffe2..0000000 --- a/mmcv/ops/csrc/common/mlu/iou3d_utils.hpp +++ /dev/null @@ -1,695 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ - -#ifndef IOU3D_UTILS_HPP_ -#define IOU3D_UTILS_HPP_ -#include "common_mlu_helper.hpp" - -#define IOU3D_SIZE 64 -#define IOU3D_UP(x, y) (x / y + (int)(x % y > 0)) * y -#define IOU3D_DOWN(x, y) (x / y) * y -#define SIZE_NRAM_BUF (MAX_NRAM_SIZE) -#define SIZE_SRAM_BUF (MAX_SRAM_SIZE) -#define COMPUTE_COUNT_ALIGN 64 -#define INFO_NUM (5) // score, x1, y1, x2, y2 -#define REDUCE_NUM \ - (7) // score, x1, y1, x2, y2, max_index (reserve 2 num for half-type input) -#define SINGLE_BOX_DIM 5 -#define MEMORY_CORE (0x80) -__mlu_func__ void pvLock() { -#if __BANG_ARCH__ == 270 - if (coreId != MEMORY_CORE) { - __bang_lock(0, 0); - } -#endif -} - -__mlu_func__ void pvUnlock() { -#if __BANG_ARCH__ == 270 - if (coreId != MEMORY_CORE) { - __bang_unlock(0, 0); - } -#endif -} - -// cross2d(A, B) = A.x * B.y - A.y * B.x; -template -inline __mlu_func__ void cross2d(T *result, const T *p1_x, const T *p1_y, - const T *p2_x, const T *p2_y, - const int &length, T *temp_ram) { - __bang_mul((T *)temp_ram, (T *)p1_x, (T *)p2_y, length); - __bang_mul((T *)result, (T *)p1_y, (T *)p2_x, length); - __bang_sub((T *)result, (T *)temp_ram, (T *)result, length); -} - -// dot2d(A, B) = A.x * B.x + A.y * B.y -template -inline __mlu_func__ void dot2d(T *result, const T *p1_x, const T *p1_y, - const T *p2_x, const T *p2_y, const int &length, - T *temp_ram) { - __bang_mul((T *)temp_ram, (T *)p1_x, (T *)p2_x, length); - __bang_mul((T *)result, (T *)p1_y, (T *)p2_y, length); - __bang_add((T *)result, (T *)temp_ram, (T *)result, length); -} - -template -__mlu_func__ void getRotatedVertices(T *pts_x, T *pts_y, T *box, T *temp1, - T *temp2, T *temp3, T *temp4, - const uint32_t &actual_compute_box_num) { -// T cosTheta2 = (T)cos(theta) * 0.5f; -- temp1 -// T sinTheta2 = (T)sin(theta) * 0.5f; -- temp2 -// theta is the box's 5th data: a, rotated radian; -#if __BANG_ARCH__ >= 300 - __bang_cos((float *)temp1, ((float *)box) + 4 * actual_compute_box_num, - actual_compute_box_num); - __bang_sin((float *)temp2, ((float *)box) + 4 * actual_compute_box_num, - actual_compute_box_num); -#else - __bang_taylor4_cos((T *)temp1, ((T *)box) + 4 * actual_compute_box_num, - (T *)temp3, (T *)temp4, actual_compute_box_num); - __bang_taylor4_sin((T *)temp2, ((T *)box) + 4 * actual_compute_box_num, - (T *)temp3, (T *)temp4, actual_compute_box_num); -#endif - __bang_mul_scalar((T *)temp1, (T *)temp1, (T)0.5, actual_compute_box_num); - __bang_mul_scalar((T *)temp2, (T *)temp2, (T)0.5, actual_compute_box_num); - - // Temp3 = sinTheta2 * box.h; - // Temp4 = cosTheta2 * box.w; - __bang_mul((T *)temp3, (T *)temp2, ((T *)box) + 3 * actual_compute_box_num, - actual_compute_box_num); - __bang_mul((T *)temp4, (T *)temp1, ((T *)box) + 2 * actual_compute_box_num, - actual_compute_box_num); - // pts[0].x = box.x_ctr - sinTheta2 * box.h - cosTheta2 * box.w; - // pts[1].x = box.x_ctr + sinTheta2 * box.h - cosTheta2 * box.w; - __bang_sub((T *)pts_x, (T *)box, (T *)temp3, actual_compute_box_num); - __bang_sub((T *)pts_x, (T *)pts_x, (T *)temp4, actual_compute_box_num); - __bang_add((T *)pts_x + 1 * actual_compute_box_num, (T *)box, (T *)temp3, - actual_compute_box_num); - __bang_sub((T *)pts_x + 1 * actual_compute_box_num, - (T *)pts_x + 1 * actual_compute_box_num, (T *)temp4, - actual_compute_box_num); - // Temp3 = cosTheta2 * box.h; - // Temp4 = sinTheta2 * box.w; - __bang_mul((T *)temp3, (T *)temp1, box + 3 * actual_compute_box_num, - actual_compute_box_num); - __bang_mul((T *)temp4, (T *)temp2, box + 2 * actual_compute_box_num, - actual_compute_box_num); - // pts[0].y = box.y_ctr + cosTheta2 * box.h - sinTheta2 * box.w; - // pts[1].y = box.y_ctr - cosTheta2 * box.h - sinTheta2 * box.w; - __bang_add((T *)pts_y, (T *)box + 1 * actual_compute_box_num, (T *)temp3, - actual_compute_box_num); - __bang_sub((T *)pts_y, (T *)pts_y, (T *)temp4, actual_compute_box_num); - __bang_sub((T *)pts_y + 1 * actual_compute_box_num, - (T *)box + 1 * actual_compute_box_num, (T *)temp3, - actual_compute_box_num); - __bang_sub((T *)pts_y + 1 * actual_compute_box_num, - (T *)pts_y + 1 * actual_compute_box_num, (T *)temp4, - actual_compute_box_num); - // pts[2].x = 2 * box.x_ctr - pts[0].x; - // pts[3].x = 2 * box.x_ctr - pts[1].x; - __bang_add((T *)pts_x + 2 * actual_compute_box_num, (T *)box, (T *)box, - actual_compute_box_num); - __bang_sub((T *)pts_x + 2 * actual_compute_box_num, - (T *)pts_x + 2 * actual_compute_box_num, (T *)pts_x, - actual_compute_box_num); - __bang_add((T *)pts_x + 3 * actual_compute_box_num, (T *)box, (T *)box, - actual_compute_box_num); - __bang_sub((T *)pts_x + 3 * actual_compute_box_num, - (T *)pts_x + 3 * actual_compute_box_num, - (T *)pts_x + 1 * actual_compute_box_num, actual_compute_box_num); - // pts[2].y = 2 * box.y_ctr - pts[0].y; - // pts[3].y = 2 * box.y_ctr - pts[1].y; - __bang_add((T *)pts_y + 2 * actual_compute_box_num, - (T *)box + 1 * actual_compute_box_num, - (T *)box + 1 * actual_compute_box_num, actual_compute_box_num); - __bang_sub((T *)pts_y + 2 * actual_compute_box_num, - (T *)pts_y + 2 * actual_compute_box_num, (T *)pts_y, - actual_compute_box_num); - __bang_add((T *)pts_y + 3 * actual_compute_box_num, - (T *)box + 1 * actual_compute_box_num, - (T *)box + 1 * actual_compute_box_num, actual_compute_box_num); - __bang_sub((T *)pts_y + 3 * actual_compute_box_num, - (T *)pts_y + 3 * actual_compute_box_num, - (T *)pts_y + 1 * actual_compute_box_num, actual_compute_box_num); -} - -template -__mlu_func__ void getIntersectPts(T *rotated_pts1_x, T *rotated_pts1_y, - T *rotated_pts2_x, T *rotated_pts2_y, - T *vec1_x, T *vec1_y, T *vec2_x, T *vec2_y, - T *intersect_pts_x, T *intersect_pts_y, - T *valid_pts, T *nums_in_ram, T *temp1_ram, - T *temp2_ram, T *temp3_ram, T *temp4_ram, - T *temp5_ram, T *temp6_ram, T *temp7_ram, - T *temp8_ram, T *temp9_ram, T *temp10_ram, - const uint32_t &actual_compute_box_num) { -// Initialize const data to ram -// temp3 = const 1e-14(@float), length = COMPUTE_COUNT_ALIGN -#if __BANG_ARCH__ >= 300 - __bang_write_value((T *)temp3_ram, COMPUTE_COUNT_ALIGN, (T)1e-14); -#else - // NOTE: Since active_reciphp function has strict value range, - // [2.2205e-16, 2e6]@float, [0.00391, 65504]@half - __bang_write_value((T *)temp3_ram, COMPUTE_COUNT_ALIGN, (float)1e-14); -#endif - // temp4 = const T(0), length = COMPUTE_COUNT_ALIGN - __bang_write_value((T *)temp4_ram, COMPUTE_COUNT_ALIGN, (T)0); - // temp5 = const T(1), length = COMPUTE_COUNT_ALIGN - __bang_write_value((T *)temp5_ram, COMPUTE_COUNT_ALIGN, (T)1); - - // Line vector, from p1 to p2 is: p1+(p2-p1)*t, t=[0,1] - // for i = 0~3, vec[i] = pts[(i+1)%4] - pts[i] - __bang_sub((T *)vec1_x, (T *)rotated_pts1_x + actual_compute_box_num, - (T *)rotated_pts1_x, 3 * actual_compute_box_num); - __bang_sub((T *)vec1_x + 3 * actual_compute_box_num, (T *)rotated_pts1_x, - (T *)rotated_pts1_x + 3 * actual_compute_box_num, - actual_compute_box_num); - __bang_sub((T *)vec1_y, (T *)rotated_pts1_y + actual_compute_box_num, - (T *)rotated_pts1_y, 3 * actual_compute_box_num); - __bang_sub((T *)vec1_y + 3 * actual_compute_box_num, (T *)rotated_pts1_y, - (T *)rotated_pts1_y + 3 * actual_compute_box_num, - actual_compute_box_num); - - __bang_sub((T *)vec2_x, (T *)rotated_pts2_x + actual_compute_box_num, - (T *)rotated_pts2_x, 3 * actual_compute_box_num); - __bang_sub((T *)vec2_x + 3 * actual_compute_box_num, (T *)rotated_pts2_x, - (T *)rotated_pts2_x + 3 * actual_compute_box_num, - actual_compute_box_num); - __bang_sub((T *)vec2_y, (T *)rotated_pts2_y + actual_compute_box_num, - (T *)rotated_pts2_y, 3 * actual_compute_box_num); - __bang_sub((T *)vec2_y + 3 * actual_compute_box_num, (T *)rotated_pts2_y, - (T *)rotated_pts2_y + 3 * actual_compute_box_num, - actual_compute_box_num); - - // First, line test - test all line combos for intersection, 4x4 possible - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - // T det = cross2d(vec2[j], vec1[i]) -- temp2 - cross2d((T *)temp2_ram, (T *)vec2_x + j * actual_compute_box_num, - (T *)vec2_y + j * actual_compute_box_num, - (T *)vec1_x + i * actual_compute_box_num, - (T *)vec1_y + i * actual_compute_box_num, - actual_compute_box_num, (T *)temp1_ram); - // temp8 = sign(det), since active_reciphp only receive positive values - __bang_active_sign((T *)temp8_ram, (T *)temp2_ram, - actual_compute_box_num); - // deal with parallel lines, temp2 = fabs(det), temp1 = temp2 > 1e-14 - __bang_active_abs((T *)temp2_ram, (T *)temp2_ram, actual_compute_box_num); - __bang_cycle_gt((T *)temp1_ram, (T *)temp2_ram, (T *)temp3_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - // Where temp1 = false, set recip input to 1, avoiding recip(0), cause inf - __bang_not((T *)temp9_ram, (T *)temp1_ram, actual_compute_box_num); - __bang_mul((T *)temp2_ram, (T *)temp2_ram, (T *)temp1_ram, - actual_compute_box_num); - __bang_add((T *)temp2_ram, (T *)temp2_ram, (T *)temp9_ram, - actual_compute_box_num); -// temp2 = 1/temp2, use mult (1/temp2) instead of div temp2 -#if __BANG_ARCH__ >= 300 - __bang_recip((float *)temp2_ram, (float *)temp2_ram, - actual_compute_box_num); -#else - // NOTE: active_reciphp function has strict value range: - // [2.2205e-16, 2e6]@float, [0.00391, 65504]@half - __bang_active_reciphp((T *)temp2_ram, (T *)temp2_ram, - actual_compute_box_num); -#endif - // Restore temp2 invalid box value 1 and sign-bit - __bang_mul((T *)temp2_ram, (T *)temp2_ram, (T *)temp1_ram, - actual_compute_box_num); - __bang_mul((T *)temp2_ram, (T *)temp2_ram, (T *)temp8_ram, - actual_compute_box_num); - - // auto vec12 = pts2[j] - pts1[i], (temp6, temp7) = (x, y) - __bang_sub((T *)temp6_ram, - (T *)rotated_pts2_x + j * actual_compute_box_num, - (T *)rotated_pts1_x + i * actual_compute_box_num, - actual_compute_box_num); - __bang_sub((T *)temp7_ram, - (T *)rotated_pts2_y + j * actual_compute_box_num, - (T *)rotated_pts1_y + i * actual_compute_box_num, - actual_compute_box_num); - - // T t1 = cross2d(vec2[j], vec12) mult (1/det) -- temp8 - cross2d((T *)temp8_ram, (T *)vec2_x + j * actual_compute_box_num, - (T *)vec2_y + j * actual_compute_box_num, (T *)temp6_ram, - (T *)temp7_ram, actual_compute_box_num, (T *)temp9_ram); - __bang_mul((T *)temp8_ram, (T *)temp8_ram, (T *)temp2_ram, - actual_compute_box_num); - - // temp1 &= (t1 >= 0.0f && t1 <= 1.0f) -- temp9 - __bang_cycle_ge((T *)temp9_ram, (T *)temp8_ram, (T *)temp4_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp9_ram, - actual_compute_box_num); - __bang_cycle_le((T *)temp9_ram, (T *)temp8_ram, (T *)temp5_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp9_ram, - actual_compute_box_num); - - // T t2 = cross2d(vec1[i], vec12) mult temp2 -- temp9 - // NOTE: temp8(t1) is used after, reuse temp7(p2_y) as cross2d temp ram - cross2d((T *)temp9_ram, (T *)vec1_x + i * actual_compute_box_num, - (T *)vec1_y + i * actual_compute_box_num, (T *)temp6_ram, - (T *)temp7_ram, actual_compute_box_num, (T *)temp7_ram); - __bang_mul((T *)temp9_ram, (T *)temp9_ram, (T *)temp2_ram, - actual_compute_box_num); - - // temp1 &= (t2 >= 0.0f && t2 <= 1.0f) -- temp9 - __bang_cycle_ge((T *)temp7_ram, (T *)temp9_ram, (T *)temp4_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp7_ram, - actual_compute_box_num); - __bang_cycle_le((T *)temp7_ram, (T *)temp9_ram, (T *)temp5_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp7_ram, - actual_compute_box_num); - - // intersections = (pts1[i] + vec1[i] * t1) * temp1 - __bang_mul((T *)temp9_ram, (T *)vec1_x + i * actual_compute_box_num, - (T *)temp8_ram, actual_compute_box_num); - __bang_add((T *)temp9_ram, - (T *)rotated_pts1_x + i * actual_compute_box_num, - (T *)temp9_ram, actual_compute_box_num); - __bang_mul((T *)intersect_pts_x + (4 * i + j) * actual_compute_box_num, - (T *)temp9_ram, (T *)temp1_ram, actual_compute_box_num); - __bang_mul((T *)temp9_ram, (T *)vec1_y + i * actual_compute_box_num, - (T *)temp8_ram, actual_compute_box_num); - __bang_add((T *)temp9_ram, - (T *)rotated_pts1_y + i * actual_compute_box_num, - (T *)temp9_ram, actual_compute_box_num); - __bang_mul((T *)intersect_pts_y + (4 * i + j) * actual_compute_box_num, - (T *)temp9_ram, (T *)temp1_ram, actual_compute_box_num); - - // Assign `valid_pts` bit and accumulate `nums_in` of valid points of each - // box pair - __bang_or((T *)valid_pts + (4 * i + j) * actual_compute_box_num, - (T *)valid_pts + (4 * i + j) * actual_compute_box_num, - (T *)temp1_ram, actual_compute_box_num); - __bang_add((T *)nums_in_ram, (T *)nums_in_ram, (T *)temp1_ram, - actual_compute_box_num); - } - } - - // Check for vertices of rect1 inside rect2 - // temp5 = ABdotAB - dot2d((T *)temp5_ram, (T *)vec2_x, (T *)vec2_y, (T *)vec2_x, (T *)vec2_y, - actual_compute_box_num, (T *)temp9_ram); - // temp6 = ADdotAD - dot2d((T *)temp6_ram, (T *)vec2_x + 3 * actual_compute_box_num, - (T *)vec2_y + 3 * actual_compute_box_num, - (T *)vec2_x + 3 * actual_compute_box_num, - (T *)vec2_y + 3 * actual_compute_box_num, actual_compute_box_num, - (T *)temp9_ram); - // assume ABCD is the rectangle, and P is the point to be judged - // P is inside ABCD iff. P's projection on AB lines within AB - // and P's projection on AD lies within AD - for (int i = 0; i < 4; i++) { - // AP = pts1[i] - pts2[0] = (temp7, temp8) - __bang_sub((T *)temp7_ram, (T *)rotated_pts1_x + i * actual_compute_box_num, - (T *)rotated_pts2_x, actual_compute_box_num); - __bang_sub((T *)temp8_ram, (T *)rotated_pts1_y + i * actual_compute_box_num, - (T *)rotated_pts2_y, actual_compute_box_num); - - // temp9 = APdotAB = dot2d(AP, AB) - dot2d((T *)temp9_ram, (T *)temp7_ram, (T *)temp8_ram, (T *)vec2_x, - (T *)vec2_y, actual_compute_box_num, (T *)temp2_ram); - // temp10 = APdotAD = -dot2d(AP, DA) - dot2d((T *)temp10_ram, (T *)temp7_ram, (T *)temp8_ram, - (T *)vec2_x + 3 * actual_compute_box_num, - (T *)vec2_y + 3 * actual_compute_box_num, actual_compute_box_num, - (T *)temp2_ram); - __bang_mul_scalar((T *)temp10_ram, (T *)temp10_ram, (T)-1, - actual_compute_box_num); - - // ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= - // ADdotAD)) - __bang_cycle_ge((T *)temp1_ram, (T *)temp9_ram, (T *)temp4_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_cycle_ge((T *)temp2_ram, (T *)temp10_ram, (T *)temp4_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp2_ram, - actual_compute_box_num); - __bang_le((T *)temp2_ram, (T *)temp9_ram, (T *)temp5_ram, - actual_compute_box_num); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp2_ram, - actual_compute_box_num); - __bang_le((T *)temp2_ram, (T *)temp10_ram, (T *)temp6_ram, - actual_compute_box_num); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp2_ram, - actual_compute_box_num); - - // 16 means the 4x4 possible intersection points above - __bang_mul((T *)intersect_pts_x + (16 + i) * actual_compute_box_num, - (T *)temp1_ram, (T *)rotated_pts1_x + i * actual_compute_box_num, - actual_compute_box_num); - __bang_mul((T *)intersect_pts_y + (16 + i) * actual_compute_box_num, - (T *)temp1_ram, (T *)rotated_pts1_y + i * actual_compute_box_num, - actual_compute_box_num); - - // assign valid_pts bit and accumulate nums of valid points of each box pair - __bang_or((T *)valid_pts + (16 + i) * actual_compute_box_num, - (T *)valid_pts + (16 + i) * actual_compute_box_num, - (T *)temp1_ram, actual_compute_box_num); - __bang_add((T *)nums_in_ram, (T *)nums_in_ram, (T *)temp1_ram, - actual_compute_box_num); - } - - // Reverse the check - check for vertices of rect2 inside rect1 - // temp5 = ABdotAB - dot2d((T *)temp5_ram, (T *)vec1_x, (T *)vec1_y, (T *)vec1_x, (T *)vec1_y, - actual_compute_box_num, (T *)temp9_ram); - // temp6 = ADdotAD - dot2d((T *)temp6_ram, (T *)vec1_x + 3 * actual_compute_box_num, - (T *)vec1_y + 3 * actual_compute_box_num, - (T *)vec1_x + 3 * actual_compute_box_num, - (T *)vec1_y + 3 * actual_compute_box_num, actual_compute_box_num, - (T *)temp9_ram); - for (int i = 0; i < 4; i++) { - // AP = pts2[i] - pts1[0] = (temp7, temp8) - __bang_sub((T *)temp7_ram, (T *)rotated_pts2_x + i * actual_compute_box_num, - (T *)rotated_pts1_x, actual_compute_box_num); - __bang_sub((T *)temp8_ram, (T *)rotated_pts2_y + i * actual_compute_box_num, - (T *)rotated_pts1_y, actual_compute_box_num); - - // temp9 = APdotAB = dot2d(AP, AB) - dot2d((T *)temp9_ram, (T *)temp7_ram, (T *)temp8_ram, (T *)vec1_x, - (T *)vec1_y, actual_compute_box_num, (T *)temp2_ram); - // temp10 = APdotAD = -dot2d(AP, DA) - dot2d((T *)temp10_ram, (T *)temp7_ram, (T *)temp8_ram, - (T *)vec1_x + 3 * actual_compute_box_num, - (T *)vec1_y + 3 * actual_compute_box_num, actual_compute_box_num, - (T *)temp2_ram); - __bang_mul_scalar((T *)temp10_ram, (T *)temp10_ram, (T)-1, - actual_compute_box_num); - - // ((APdotAB >= 0) && (APdotAD >= 0) && (APdotAB <= ABdotAB) && (APdotAD <= - // ADdotAD)) - __bang_cycle_ge((T *)temp1_ram, (T *)temp9_ram, (T *)temp4_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_cycle_ge((T *)temp2_ram, (T *)temp10_ram, (T *)temp4_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp2_ram, - actual_compute_box_num); - __bang_le((T *)temp2_ram, (T *)temp9_ram, (T *)temp5_ram, - actual_compute_box_num); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp2_ram, - actual_compute_box_num); - __bang_le((T *)temp2_ram, (T *)temp10_ram, (T *)temp6_ram, - actual_compute_box_num); - __bang_and((T *)temp1_ram, (T *)temp1_ram, (T *)temp2_ram, - actual_compute_box_num); - - // 20 means the (4x4+4) possible intersection points above - __bang_mul((T *)intersect_pts_x + (20 + i) * actual_compute_box_num, - (T *)temp1_ram, (T *)rotated_pts2_x + i * actual_compute_box_num, - actual_compute_box_num); - __bang_mul((T *)intersect_pts_y + (20 + i) * actual_compute_box_num, - (T *)temp1_ram, (T *)rotated_pts2_y + i * actual_compute_box_num, - actual_compute_box_num); - - // assign valid_pts bit and accumulate nums of valid points of each box pair - __bang_or((T *)valid_pts + (20 + i) * actual_compute_box_num, - (T *)valid_pts + (20 + i) * actual_compute_box_num, - (T *)temp1_ram, actual_compute_box_num); - __bang_add((T *)nums_in_ram, (T *)nums_in_ram, (T *)temp1_ram, - actual_compute_box_num); - } -} - -template -__mlu_func__ void convexHullGraham( - T *intersect_pts_x, T *intersect_pts_y, T *ordered_pts_x, T *ordered_pts_y, - T *dist_ram, T *valid_box, T *valid_pts, T *nums_in_ram, T *temp1_ram, - T *temp2_ram, T *temp3_ram, T *temp_long_1, T *temp_long_2, T *temp_long_3, - const uint32_t &actual_box_num, const uint32_t &actual_compute_box_num) { - // Step1. Find the point with minimum y, if more than 1 points have the same - // minimum y, - // pick the one with the minimum x. - // set p[i].y to max_y_value if not valid_pts, to avoid invalid result - // 24 means all possible intersection points - __bang_max((T *)temp2_ram, (T *)intersect_pts_y, 24 * actual_compute_box_num); - __bang_write_value((T *)temp3_ram, COMPUTE_COUNT_ALIGN, ((T *)temp2_ram)[0]); - __bang_not((T *)temp_long_1, (T *)valid_pts, 24 * actual_compute_box_num); - __bang_cycle_mul((T *)temp_long_1, (T *)temp_long_1, (T *)temp3_ram, - 24 * actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_mul((T *)temp_long_2, (T *)intersect_pts_y, (T *)valid_pts, - 24 * actual_compute_box_num); - __bang_add((T *)temp_long_2, (T *)temp_long_2, (T *)temp_long_1, - 24 * actual_compute_box_num); - // temp2 = min_y_value(temp_long_2), use min_pool, channel=box_num, h=1, w=24 - __bang_minpool((T *)temp2_ram, (T *)temp_long_2, actual_compute_box_num, 1, - 24, 1, 24, 1, 24); - __bang_mul((T *)temp2_ram, (T *)temp2_ram, (T *)valid_box, - actual_compute_box_num); - - // set p[i].x to max_x_value if not min_y point - __bang_max((T *)temp1_ram, (T *)intersect_pts_x, 24 * actual_compute_box_num); - __bang_write_value((T *)temp3_ram, COMPUTE_COUNT_ALIGN, ((T *)temp1_ram)[0]); - __bang_cycle_eq((T *)temp_long_1, (T *)temp_long_2, (T *)temp2_ram, - 24 * actual_compute_box_num, actual_compute_box_num); - __bang_and((T *)temp_long_1, (T *)temp_long_1, (T *)valid_pts, - 24 * actual_compute_box_num); - __bang_not((T *)temp_long_3, (T *)temp_long_1, 24 * actual_compute_box_num); - __bang_cycle_mul((T *)temp_long_3, (T *)temp_long_3, (T *)temp3_ram, - 24 * actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_mul((T *)temp_long_1, (T *)intersect_pts_x, (T *)temp_long_1, - 24 * actual_compute_box_num); - __bang_add((T *)temp_long_1, (T *)temp_long_1, (T *)temp_long_3, - 24 * actual_compute_box_num); - // temp3 = min_x_value(temp_long_1), use min_pool, channel=box_num, h=1, w=24 - __bang_minpool((T *)temp3_ram, (T *)temp_long_1, actual_compute_box_num, 1, - 24, 1, 24, 1, 24); - __bang_mul((T *)temp3_ram, (T *)temp3_ram, (T *)valid_box, - actual_compute_box_num); - - // Step2. All points subtract starting-point (for sorting in the next step) - __bang_cycle_sub((T *)ordered_pts_x, (T *)intersect_pts_x, (T *)temp3_ram, - 24 * actual_compute_box_num, actual_compute_box_num); - __bang_cycle_sub((T *)ordered_pts_y, (T *)intersect_pts_y, (T *)temp2_ram, - 24 * actual_compute_box_num, actual_compute_box_num); - __bang_mul((T *)ordered_pts_x, (T *)ordered_pts_x, (T *)valid_pts, - 24 * actual_compute_box_num); - __bang_mul((T *)ordered_pts_y, (T *)ordered_pts_y, (T *)valid_pts, - 24 * actual_compute_box_num); - - // Step3. Sort every intersection point according to their relative - // cross-product values (essentially sorting according to angles) - // If the angles are the same, sort according to distance to origin - dot2d((T *)dist_ram, (T *)ordered_pts_x, (T *)ordered_pts_y, - (T *)ordered_pts_x, (T *)ordered_pts_y, 24 * actual_compute_box_num, - (T *)temp_long_3); - - T temp, temp_nums_in, temp_dist_1, temp_dist_2; - T temp1_x, temp1_y; - T temp2_x, temp2_y; - for (int i = 0; i < actual_box_num; i++) { - if (((T *)valid_box)[i]) { - // make sure all nums_in[i] points are at the front - for (int ii = 0; ii < 23; ii++) { - for (int jj = ii + 1; jj < 24; jj++) { - int ii_index = ii * actual_compute_box_num + i; - int jj_index = jj * actual_compute_box_num + i; - // ii point is not valid and jj point is valid, swap jj for ii - if ((!((T *)valid_pts)[ii_index]) && ((T *)valid_pts)[jj_index]) { - ((T *)ordered_pts_x)[ii_index] = ((T *)ordered_pts_x)[jj_index]; - ((T *)ordered_pts_y)[ii_index] = ((T *)ordered_pts_y)[jj_index]; - ((T *)dist_ram)[ii_index] = ((T *)dist_ram)[jj_index]; - ((T *)valid_pts)[ii_index] = true; - ((T *)ordered_pts_x)[jj_index] = 0; - ((T *)ordered_pts_y)[jj_index] = 0; - ((T *)dist_ram)[jj_index] = 0; - ((T *)valid_pts)[jj_index] = false; - break; - } - } - } - temp_nums_in = ((T *)nums_in_ram)[i]; - // make original q[0] = min_x, min_y before sort - for (int ii = 1; ii < temp_nums_in; ii++) { - int ii_index = ii * actual_compute_box_num + i; - if (((T *)dist_ram)[ii_index] == 0) { - // swap q[ii_index] and q[0] - ((T *)ordered_pts_x)[ii_index] = ((T *)ordered_pts_x)[i]; - ((T *)ordered_pts_y)[ii_index] = ((T *)ordered_pts_y)[i]; - ((T *)dist_ram)[ii_index] = ((T *)dist_ram)[i]; - ((T *)ordered_pts_x)[i] = 0; - ((T *)ordered_pts_y)[i] = 0; - ((T *)dist_ram)[i] = 0; - break; - } - } - for (int ii = 1; ii < temp_nums_in - 1; ii++) { - for (int jj = ii + 1; jj < temp_nums_in; jj++) { - int ii_index = ii * actual_compute_box_num + i; - int jj_index = jj * actual_compute_box_num + i; - temp1_x = ((T *)ordered_pts_x)[ii_index]; - temp1_y = ((T *)ordered_pts_y)[ii_index]; - temp2_x = ((T *)ordered_pts_x)[jj_index]; - temp2_y = ((T *)ordered_pts_y)[jj_index]; - // calculate cross product and sort q (ordered_pts) - temp = (temp1_x * temp2_y) - (temp1_y * temp2_x); - temp_dist_1 = ((T *)dist_ram)[ii_index]; - temp_dist_2 = ((T *)dist_ram)[jj_index]; - if ((temp < (T)-1e-6) || - ((fabs(temp) < (T)1e-6) && (temp_dist_1 > temp_dist_2))) { - ((T *)ordered_pts_x)[ii_index] = temp2_x; - ((T *)ordered_pts_y)[ii_index] = temp2_y; - ((T *)ordered_pts_x)[jj_index] = temp1_x; - ((T *)ordered_pts_y)[jj_index] = temp1_y; - ((T *)dist_ram)[ii_index] = temp_dist_2; - ((T *)dist_ram)[jj_index] = temp_dist_1; - } - } - } - - // Step4: - // Make sure there are at least 2 points(that don't overlap with each - // other) in the stack - int k; // index of the non-overlapped second point - for (k = 1; k < temp_nums_in; k++) { - if (((T *)dist_ram)[k * actual_compute_box_num + i] > (T)1e-8) { - break; - } - } - if (k == temp_nums_in) { - // We reach the end, which means the convex hull is just one point - // set valid_box = 0, to get ious = 0 - ((T *)valid_box)[i] = 0; - continue; - } - // q[1] = q[k]; - ((T *)ordered_pts_x)[actual_compute_box_num + i] = - ((T *)ordered_pts_x)[k * actual_compute_box_num + i]; - ((T *)ordered_pts_y)[actual_compute_box_num + i] = - ((T *)ordered_pts_y)[k * actual_compute_box_num + i]; - - // Step 5: - // Finally we can start the scanning process. - // When a non-convex relationship between the 3 points is found - // (either concave shape or duplicated points), - // we pop the previous point from the stack - // until the 3-point relationship is convex again, or - // until the stack only contains two points - int m = 2; // 2 points in the stack - for (int j = k + 1; j < temp_nums_in; j++) { - // while (m > 1 && cross2d(q[j] - q[m - 2], q[m - 1] - q[m - 2]) >= - // 0) { - // m--; - // } - temp1_x = ((T *)ordered_pts_x)[j * actual_compute_box_num + i] - - ((T *)ordered_pts_x)[(m - 2) * actual_compute_box_num + i]; - temp1_y = ((T *)ordered_pts_y)[j * actual_compute_box_num + i] - - ((T *)ordered_pts_y)[(m - 2) * actual_compute_box_num + i]; - temp2_x = ((T *)ordered_pts_x)[(m - 1) * actual_compute_box_num + i] - - ((T *)ordered_pts_x)[(m - 2) * actual_compute_box_num + i]; - temp2_y = ((T *)ordered_pts_y)[(m - 1) * actual_compute_box_num + i] - - ((T *)ordered_pts_y)[(m - 2) * actual_compute_box_num + i]; - temp = (temp1_x * temp2_y) - (temp1_y * temp2_x); - while ((m > 1) && (temp >= 0)) { - m--; - if (m > 1) { - temp1_x = - ((T *)ordered_pts_x)[j * actual_compute_box_num + i] - - ((T *)ordered_pts_x)[(m - 2) * actual_compute_box_num + i]; - temp1_y = - ((T *)ordered_pts_y)[j * actual_compute_box_num + i] - - ((T *)ordered_pts_y)[(m - 2) * actual_compute_box_num + i]; - temp2_x = - ((T *)ordered_pts_x)[(m - 1) * actual_compute_box_num + i] - - ((T *)ordered_pts_x)[(m - 2) * actual_compute_box_num + i]; - temp2_y = - ((T *)ordered_pts_y)[(m - 1) * actual_compute_box_num + i] - - ((T *)ordered_pts_y)[(m - 2) * actual_compute_box_num + i]; - temp = (temp1_x * temp2_y) - (temp1_y * temp2_x); - } - } - // q[m++] = q[j]; - ((T *)ordered_pts_x)[m * actual_compute_box_num + i] = - ((T *)ordered_pts_x)[j * actual_compute_box_num + i]; - ((T *)ordered_pts_y)[m * actual_compute_box_num + i] = - ((T *)ordered_pts_y)[j * actual_compute_box_num + i]; - m++; - } - // set last(24-m) valid_pts to false, to erase invalid q in polygon area - for (int j = m; j < temp_nums_in; j++) { - ((T *)valid_pts)[j * actual_compute_box_num + i] = 0; - } - ((T *)nums_in_ram)[i] = m; - } - } -} - -template -__mlu_func__ void polygonArea(T *ordered_pts_x, T *ordered_pts_y, T *valid_box, - T *valid_pts, T *nums_in_ram, T *temp1_ram, - T *temp2_ram, T *temp3_ram, T *temp4_ram, - T *temp5_ram, T *temp6_ram, T *temp7_ram, - T *temp8_ram, T *temp9_ram, - const uint32_t &actual_compute_box_num) { - // Set where nums_in <= 2, valid_box = false - __bang_write_value((T *)temp9_ram, COMPUTE_COUNT_ALIGN, (T)2); - __bang_cycle_gt((T *)temp1_ram, (T *)nums_in_ram, (T *)temp9_ram, - actual_compute_box_num, COMPUTE_COUNT_ALIGN); - __bang_and((T *)valid_box, (T *)valid_box, (T *)temp1_ram, - actual_compute_box_num); - - // temp1 = area, initialize with all 0 - __bang_write_zero((T *)temp1_ram, actual_compute_box_num); - __bang_max((T *)temp7_ram, (T *)nums_in_ram, actual_compute_box_num); - - // temp_nums_in = max(nums_in) - T temp_nums_in = ((T *)temp7_ram)[0]; - for (int i = 1; i < temp_nums_in - 1; i++) { - // q[i] - q[0]: (temp6, temp7) - __bang_sub((T *)temp6_ram, (T *)ordered_pts_x + i * actual_compute_box_num, - (T *)ordered_pts_x, actual_compute_box_num); - __bang_sub((T *)temp7_ram, (T *)ordered_pts_y + i * actual_compute_box_num, - (T *)ordered_pts_y, actual_compute_box_num); - __bang_mul((T *)temp6_ram, (T *)temp6_ram, - (T *)valid_pts + (i + 1) * actual_compute_box_num, - actual_compute_box_num); - __bang_mul((T *)temp7_ram, (T *)temp7_ram, - (T *)valid_pts + (i + 1) * actual_compute_box_num, - actual_compute_box_num); - // q[i + 1] - q[0]: (temp8, temp9) - __bang_sub((T *)temp8_ram, - (T *)ordered_pts_x + (i + 1) * actual_compute_box_num, - (T *)ordered_pts_x, actual_compute_box_num); - __bang_sub((T *)temp9_ram, - (T *)ordered_pts_y + (i + 1) * actual_compute_box_num, - (T *)ordered_pts_y, actual_compute_box_num); - __bang_mul((T *)temp8_ram, (T *)temp8_ram, - (T *)valid_pts + (i + 1) * actual_compute_box_num, - actual_compute_box_num); - __bang_mul((T *)temp9_ram, (T *)temp9_ram, - (T *)valid_pts + (i + 1) * actual_compute_box_num, - actual_compute_box_num); - // area += fabs(cross2d(q[i] - q[0], q[i + 1] - q[0])); - __bang_mul((T *)temp4_ram, (T *)temp6_ram, (T *)temp9_ram, - actual_compute_box_num); - __bang_mul((T *)temp5_ram, (T *)temp7_ram, (T *)temp8_ram, - actual_compute_box_num); - __bang_sub((T *)temp3_ram, (T *)temp4_ram, (T *)temp5_ram, - actual_compute_box_num); - __bang_active_abs((T *)temp3_ram, (T *)temp3_ram, actual_compute_box_num); - __bang_add((T *)temp1_ram, (T *)temp1_ram, (T *)temp3_ram, - actual_compute_box_num); - } - // Set where valid_box = false, intersection = 0 - __bang_mul((T *)temp1_ram, (T *)temp1_ram, (T *)valid_box, - actual_compute_box_num); - // area = area / 2.0 - __bang_mul_scalar((T *)temp1_ram, (T *)temp1_ram, (T)0.5, - actual_compute_box_num); -} - -#endif // IOU3D_UTILS_HPP_ diff --git a/mmcv/ops/csrc/common/mlu/masked_conv2d_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/masked_conv2d_mlu_kernel.mlu deleted file mode 100755 index 1356a79..0000000 --- a/mmcv/ops/csrc/common/mlu/masked_conv2d_mlu_kernel.mlu +++ /dev/null @@ -1,181 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "common_mlu_helper.hpp" - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -template -__mlu_func__ void MLUUnion1MaskedIm2colForward( - const T *feature, const int height, const int width, const int channels, - const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, - const int32_t *mask_h_idx, const int32_t *mask_w_idx, const int mask_cnt, - T *data_col) { - for (int index = taskId; index < mask_cnt; index += taskDim) { - const int h_col = mask_h_idx[index]; - const int w_col = mask_w_idx[index]; - const int h_offset = h_col - pad_h; - const int w_offset = w_col - pad_w; - int h_start = h_offset; - int h_end = h_offset + kernel_h - 1; - int w_start = w_offset; - int w_end = w_start + kernel_w - 1; - if (h_start >= height || w_start >= width || h_end < 0 || w_end < 0) { - continue; - } else { - int h_start_valid = max(0, h_start); - int h_end_valid = min(height - 1, h_end); - int w_start_valid = max(0, w_start); - int w_end_valid = min(width - 1, w_end); - __memcpy( - data_col + index * kernel_h * kernel_w * channels + - ((h_start_valid - h_start) * kernel_w + - (w_start_valid - w_start)) * - channels, - feature + h_start_valid * width * channels + w_start_valid * channels, - (w_end_valid - w_start_valid + 1) * channels * sizeof(T), GDRAM2GDRAM, - kernel_w * channels * sizeof(T), width * channels * sizeof(T), - h_end_valid - h_start_valid); - } - } -} - -template -__mlu_func__ void MLUUnion1MaskedCol2imForward(const T *col, const int height, - const int width, - const int channels, - const int32_t *mask_h_idx, - const int32_t *mask_w_idx, - const int mask_cnt, T *im) { - const int channels_max_num_nram = MAX_NRAM_SIZE / sizeof(T); - if (channels <= channels_max_num_nram) { - const int deal_num = channels_max_num_nram / channels; - int mask_per_core = mask_cnt / taskDim; - const int mask_remain = mask_cnt % taskDim; - mask_per_core += taskId < mask_remain ? 1 : 0; - int index_start = taskId < mask_remain - ? taskId * mask_per_core - : taskId * mask_per_core + mask_remain; - int loop = mask_per_core / deal_num; - int remain_num = mask_per_core % deal_num; - T *nram_col = (T *)nram_buffer; - for (int index = 0; index < loop; ++index) { - int cur_index = index_start + index * deal_num; - __memcpy(nram_col, col + cur_index * channels, - deal_num * channels * sizeof(T), GDRAM2NRAM); - for (int i = 0; i < deal_num; ++i) { - int mask_index = cur_index + i; - const int h_im = mask_h_idx[mask_index]; - const int w_im = mask_w_idx[mask_index]; - // if(h_im>=height || w_im>=width) continue; - __memcpy(im + (h_im * width + w_im) * channels, nram_col + i * channels, - channels * sizeof(T), NRAM2GDRAM); - } - } - if (remain_num > 0) { - int cur_index = index_start + loop * deal_num; - __memcpy(nram_col, col + cur_index * channels, - remain_num * channels * sizeof(T), GDRAM2NRAM); - for (int i = 0; i < remain_num; ++i) { - int mask_index = cur_index + i; - const int h_im = mask_h_idx[mask_index]; - const int w_im = mask_w_idx[mask_index]; - // if(h_im>=height || w_im>=width) continue; - __memcpy(im + (h_im * width + w_im) * channels, nram_col + i * channels, - channels * sizeof(T), NRAM2GDRAM); - } - } - } else { - for (int index = taskId; index < mask_cnt; index += taskDim) { - const int m_index = index % mask_cnt; - const int h_im = mask_h_idx[m_index]; - const int w_im = mask_w_idx[m_index]; - // if(h_im>=height || w_im>=width) continue; - __memcpy(im + (h_im * width + w_im) * channels, col + index * channels, - channels * sizeof(T), GDRAM2GDRAM); - } - } -} - -__mlu_global__ void MLUKernelMaskedIm2colForward( - const void *feature, const int height, const int width, const int channels, - const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, - const void *mask_h_idx, const void *mask_w_idx, const int mask_cnt, - void *data_col, const cnrtDataType_t data_dtype) { - if (coreId == 0x80) { - return; - } - - switch (data_dtype) { - case CNRT_FLOAT16: { - MLUUnion1MaskedIm2colForward((half *)feature, height, width, channels, - kernel_h, kernel_w, pad_h, pad_w, - (int32_t *)mask_h_idx, (int32_t *)mask_w_idx, - mask_cnt, (half *)data_col); - }; break; - case CNRT_FLOAT32: { - MLUUnion1MaskedIm2colForward((float *)feature, height, width, channels, - kernel_h, kernel_w, pad_h, pad_w, - (int32_t *)mask_h_idx, (int32_t *)mask_w_idx, - mask_cnt, (float *)data_col); - }; break; - default: { - break; - } - } -} - -__mlu_global__ void MLUKernelMaskedCol2imForward( - const void *col, const int height, const int width, const int channels, - const void *mask_h_idx, const void *mask_w_idx, const int mask_cnt, - void *im, const cnrtDataType_t data_dtype) { - if (coreId == 0x80) { - return; - } - switch (data_dtype) { - case CNRT_FLOAT16: { - MLUUnion1MaskedCol2imForward((half *)col, height, width, channels, - (int32_t *)mask_h_idx, (int32_t *)mask_w_idx, - mask_cnt, (half *)im); - }; break; - case CNRT_FLOAT32: { - MLUUnion1MaskedCol2imForward((float *)col, height, width, channels, - (int32_t *)mask_h_idx, (int32_t *)mask_w_idx, - mask_cnt, (float *)im); - }; break; - default: { - break; - } - } -} - -void KernelMaskedIm2colForward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - cnrtDataType_t k_dtype, const void *im_ptr, const int height, - const int width, const int channels, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const void *mask_h_idx_ptr, - const void *mask_w_idx_ptr, const int mask_cnt, void *col_ptr) { - MLUKernelMaskedIm2colForward<<>>( - im_ptr, height, width, channels, kernel_h, kernel_w, pad_h, pad_w, - mask_h_idx_ptr, mask_w_idx_ptr, mask_cnt, col_ptr, k_dtype); -} - -void KernelMaskedCol2imForward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, cnrtDataType_t k_dtype, - const void *col_ptr, const int height, - const int width, const int channels, - const void *mask_h_idx_ptr, - const void *mask_w_idx_ptr, const int mask_cnt, - void *im_ptr) { - MLUKernelMaskedCol2imForward<<>>( - col_ptr, height, width, channels, mask_h_idx_ptr, mask_w_idx_ptr, - mask_cnt, im_ptr, k_dtype); -} diff --git a/mmcv/ops/csrc/common/mlu/ms_deform_attn_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/ms_deform_attn_mlu_kernel.mlu deleted file mode 100644 index 7899e52..0000000 --- a/mmcv/ops/csrc/common/mlu/ms_deform_attn_mlu_kernel.mlu +++ /dev/null @@ -1,853 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 by Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ - -#include "common_mlu_helper.hpp" -#include - -/**************************************************************************************** - * - * NRAM partition forward: - * | spatial_shapes | data_value_p1_ping | data_value_p2_ping | - * | data_value_p3_ping | data_value_p4_ping | data_col_ping | - * | data_value_p1_pong | data_value_p2_pong | data_value_p3_pong | - * | data_value_p4_pong | data_col_pong | auxiliary_a | - * | auxiliary_b | - * | 128bytes | deal_size | deal_size | - * | deal_size | deal_size | deal_size | - * | deal_size | deal_size | deal_size | - * | deal_size | deal_size | deal_size | - * | deal_size | - * - ****************************************************************************************/ - -/**************************************************************************************** - * - * NRAM partition backward: - * | grad_output_nram | grad_output_nram_temp | grad_weight | - * | grad_h_weight | grad_w_weight | top_grad | - * | top_grad_temp | spatial_shapes_nram | sampling_loc_nram | - * | deal_size | deal_size | deal_size | - * | deal_size | deal_size | deal_size | - * | deal_size | deal_size | 64bytes | - * - ****************************************************************************************/ - -#define TWELVE_SPLIT 12 -#define ALIGN_NUM 64 -#define ALIGN_NUM_FOR_REDUCE 32 - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -template -__mlu_func__ void loadNeighborPointsData( - const T *data_value_gdram, T *data_value_p1_nram, T *data_value_p2_nram, - T *data_value_p3_nram, T *data_value_p4_nram, const size_t deal_num, - const int32_t &width, const int32_t &height, const int32_t &num_heads, - const int32_t &channels, const T &x, const T &y, const int32_t &head_idx) { - const int32_t w_low = floorf(x); - const int32_t h_low = floorf(y); - const int32_t w_high = w_low + 1; - const int32_t h_high = h_low + 1; - - const int32_t w_stride = num_heads * channels; - const int32_t h_stride = width * w_stride; - const int32_t h_low_ptr_offset = h_low * h_stride; - const int32_t h_high_ptr_offset = h_low_ptr_offset + h_stride; - const int32_t w_low_ptr_offset = w_low * w_stride; - const int32_t w_high_ptr_offset = w_low_ptr_offset + w_stride; - const int32_t base_ptr_offset = head_idx * channels; - - // top-left point - if (h_low >= 0 && w_low >= 0) { - const int32_t v1_offset = - h_low_ptr_offset + w_low_ptr_offset + base_ptr_offset; - __memcpy_async(data_value_p1_nram, data_value_gdram + v1_offset, - deal_num * sizeof(T), GDRAM2NRAM); - } - - // top-right point - if (h_low >= 0 && w_high <= width - 1) { - const int32_t v2_offset = - h_low_ptr_offset + w_high_ptr_offset + base_ptr_offset; - __memcpy_async(data_value_p2_nram, data_value_gdram + v2_offset, - deal_num * sizeof(T), GDRAM2NRAM); - } - - // bottom-left point - if (h_high <= height - 1 && w_low >= 0) { - const int32_t v3_offset = - h_high_ptr_offset + w_low_ptr_offset + base_ptr_offset; - __memcpy_async(data_value_p3_nram, data_value_gdram + v3_offset, - deal_num * sizeof(T), GDRAM2NRAM); - } - - // bottom-right point - if (h_high <= height - 1 && w_high <= width - 1) { - const int32_t v4_offset = - h_high_ptr_offset + w_high_ptr_offset + base_ptr_offset; - __memcpy_async(data_value_p4_nram, data_value_gdram + v4_offset, - deal_num * sizeof(T), GDRAM2NRAM); - } -} - -template -__mlu_func__ void bilinearInterpolation( - T *data_value_p1_nram, T *data_value_p2_nram, T *data_value_p3_nram, - T *data_value_p4_nram, T *sample_point_value, T *auxiliary_b, - const size_t deal_num, const int32_t &width, const int32_t &height, - const T &x, const T &y) { - const int32_t w_low = floorf(x); - const int32_t h_low = floorf(y); - const int32_t w_high = w_low + 1; - const int32_t h_high = h_low + 1; - - const T lw = x - w_low; - const T lh = y - h_low; - const T hw = 1 - lw; - const T hh = 1 - lh; - const T w1 = hh * hw; - const T w2 = hh * lw; - const T w3 = lh * hw; - const T w4 = lh * lw; - - __bang_write_value((T *)sample_point_value, deal_num, (T)0); - - // top-left point - if (h_low >= 0 && w_low >= 0) { - // sample_point_value += v1 * w1 - __bang_mul_scalar((T *)auxiliary_b, (T *)data_value_p1_nram, (T)w1, - deal_num); - __bang_add((T *)sample_point_value, (T *)sample_point_value, - (T *)auxiliary_b, deal_num); - } - - // top-right point - if (h_low >= 0 && w_high <= width - 1) { - // sample_point_value += v2 * w2 - __bang_mul_scalar((T *)auxiliary_b, (T *)data_value_p2_nram, (T)w2, - deal_num); - __bang_add((T *)sample_point_value, (T *)sample_point_value, - (T *)auxiliary_b, deal_num); - } - - // bottom-left point - if (h_high <= height - 1 && w_low >= 0) { - // sample_point_value += v3 * w3 - __bang_mul_scalar((T *)auxiliary_b, (T *)data_value_p3_nram, (T)w3, - deal_num); - __bang_add((T *)sample_point_value, (T *)sample_point_value, - (T *)auxiliary_b, deal_num); - } - - // bottom-right point - if (h_high <= height - 1 && w_high <= width - 1) { - // sample_point_value += v4 * w4 - __bang_mul_scalar((T *)auxiliary_b, (T *)data_value_p4_nram, (T)w4, - deal_num); - __bang_add((T *)sample_point_value, (T *)sample_point_value, - (T *)auxiliary_b, deal_num); - } -} - -template -__mlu_global__ void MLUKernelMsDeformAttnForward( - const char *data_value_gdram, const char *data_spatial_shapes_gdram, - const char *data_level_start_index_gdram, - const char *data_sampling_loc_gdram, const char *data_attn_weight_gdram, - const int32_t batch_size, const int32_t num_keys, const int32_t num_heads, - const int32_t channels, const int32_t num_levels, const int32_t num_queries, - const int32_t num_points, char *data_col_gdram) { - if (coreId == 0x80) { - return; - } - - const size_t spatial_size = PAD_UP(2 * sizeof(int32_t), NFU_ALIGN_SIZE); - const size_t span_num_deal = - PAD_DOWN((MAX_NRAM_SIZE - spatial_size) / TWELVE_SPLIT / sizeof(T), - NFU_ALIGN_SIZE); - const size_t align_num = NFU_ALIGN_SIZE; - const int32_t channels_seg_num = channels / span_num_deal; - const size_t channels_rem = channels % span_num_deal; - const size_t channels_align_rem = CEIL_ALIGN(channels_rem, align_num); - char *data_spatial_shapes_nram = nram_buffer; - char *ping_data_value_p1_nram = data_spatial_shapes_nram + spatial_size; - char *ping_data_value_p2_nram = - ping_data_value_p1_nram + span_num_deal * sizeof(T); - char *ping_data_value_p3_nram = - ping_data_value_p2_nram + span_num_deal * sizeof(T); - char *ping_data_value_p4_nram = - ping_data_value_p3_nram + span_num_deal * sizeof(T); - char *ping_data_col_nram = - ping_data_value_p4_nram + span_num_deal * sizeof(T); - char *pong_data_value_p1_nram = - ping_data_col_nram + span_num_deal * sizeof(T); - char *pong_data_value_p2_nram = - pong_data_value_p1_nram + span_num_deal * sizeof(T); - char *pong_data_value_p3_nram = - pong_data_value_p2_nram + span_num_deal * sizeof(T); - char *pong_data_value_p4_nram = - pong_data_value_p3_nram + span_num_deal * sizeof(T); - char *pong_data_col_nram = - pong_data_value_p4_nram + span_num_deal * sizeof(T); - char *auxiliary_a = pong_data_col_nram + span_num_deal * sizeof(T); - char *auxiliary_b = auxiliary_a + span_num_deal * sizeof(T); - const size_t ping_pong_gap = 5 * span_num_deal * sizeof(T); - size_t data_col_ping_pong_idx = 0; - - int32_t block_num_per_core = (batch_size * num_queries * num_heads) / taskDim; - const int32_t block_num_rem = - (batch_size * num_queries * num_heads) % taskDim; - const int32_t idx_start = taskId < (block_num_rem + 1) - ? taskId * (block_num_per_core + 1) - : taskId * block_num_per_core + block_num_rem; - block_num_per_core = - taskId < block_num_rem - ? (batch_size * num_queries * num_heads) / taskDim + 1 - : (batch_size * num_queries * num_heads) / taskDim; - - for (int32_t cur_idx = idx_start; cur_idx < idx_start + block_num_per_core; - ++cur_idx) { - // cur_idx = batch_idx * num_queries * num_heads + query_idx * num_heads + - // head_idx - const int32_t head_idx = cur_idx % num_heads; - const int32_t batch_idx = (cur_idx / num_heads) / num_queries; - - const char *data_value_gdram_start = - data_value_gdram + - batch_idx * num_keys * num_heads * channels * sizeof(T); - const char *data_sampling_loc_gdram_start = - data_sampling_loc_gdram + - cur_idx * num_levels * num_points * 2 * sizeof(T); - const char *data_attn_weight_gdram_start = - data_attn_weight_gdram + cur_idx * num_levels * num_points * sizeof(T); - char *data_col_gdram_start = - data_col_gdram + cur_idx * channels * sizeof(T); - - for (int32_t c_seg_idx = 0; c_seg_idx < channels_seg_num; ++c_seg_idx) { - __bang_write_value( - (T *)(ping_data_col_nram + data_col_ping_pong_idx * ping_pong_gap), - span_num_deal, (T)0); - // load data - // level_idx = 0, point_idx = 0 - __memcpy(data_spatial_shapes_nram, data_spatial_shapes_gdram, - 2 * sizeof(int32_t), GDRAM2NRAM); - int32_t spatial_h = ((int32_t *)data_spatial_shapes_nram)[0]; - int32_t spatial_w = ((int32_t *)data_spatial_shapes_nram)[1]; - const char *data_value_ptr = - data_value_gdram_start + c_seg_idx * span_num_deal * sizeof(T); - T loc_w = ((T *)data_sampling_loc_gdram_start)[0]; - T loc_h = ((T *)data_sampling_loc_gdram_start)[1]; - T weight = ((T *)data_attn_weight_gdram_start)[0]; - T x = loc_w * spatial_w - 0.5; - T y = loc_h * spatial_h - 0.5; - if (y > -1 && x > -1 && y < spatial_h && x < spatial_w) { - loadNeighborPointsData( - (T *)data_value_ptr, (T *)ping_data_value_p1_nram, - (T *)ping_data_value_p2_nram, (T *)ping_data_value_p3_nram, - (T *)ping_data_value_p4_nram, span_num_deal, spatial_w, spatial_h, - num_heads, channels, x, y, head_idx); - } - T spatial_h_next_point = 0; - T spatial_w_next_point = 0; - T weight_next_point = 0; - T x_next_point = 0; - T y_next_point = 0; - __asm__ volatile("sync;"); - - for (int32_t level_idx = 0; level_idx < num_levels; ++level_idx) { - for (int32_t point_idx = 0; point_idx < num_points; ++point_idx) { - // load data - if (point_idx == num_points - 1 && level_idx == num_levels - 1) { - // last point no need to load data, continue to compute - } else if (point_idx == num_points - 1) { - const int32_t level_start_id = - ((int32_t *)data_level_start_index_gdram)[level_idx + 1]; - const int32_t spatial_h_ptr = (level_idx + 1) << 1; - __memcpy( - data_spatial_shapes_nram, - data_spatial_shapes_gdram + spatial_h_ptr * sizeof(int32_t), - 2 * sizeof(int32_t), GDRAM2NRAM); - spatial_h_next_point = ((int32_t *)data_spatial_shapes_nram)[0]; - spatial_w_next_point = ((int32_t *)data_spatial_shapes_nram)[1]; - data_value_ptr = data_value_gdram_start + - (level_start_id * num_heads * channels + - c_seg_idx * span_num_deal) * - sizeof(T); - loc_w = ((T *)data_sampling_loc_gdram_start) - [(level_idx * num_points + point_idx + 1) * 2]; - loc_h = ((T *)data_sampling_loc_gdram_start) - [(level_idx * num_points + point_idx + 1) * 2 + 1]; - weight_next_point = - ((T *)data_attn_weight_gdram_start)[level_idx * num_points + - point_idx + 1]; - x_next_point = loc_w * spatial_w_next_point - 0.5; - y_next_point = loc_h * spatial_h_next_point - 0.5; - if (y_next_point > -1 && x_next_point > -1 && - y_next_point < spatial_h_next_point && - x_next_point < spatial_w_next_point) { - loadNeighborPointsData( - (T *)data_value_ptr, - (T *)(ping_data_value_p1_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p2_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p3_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p4_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - span_num_deal, spatial_w_next_point, spatial_h_next_point, - num_heads, channels, x_next_point, y_next_point, head_idx); - } - } else { - spatial_h_next_point = spatial_h; - spatial_w_next_point = spatial_w; - loc_w = ((T *)data_sampling_loc_gdram_start) - [(level_idx * num_points + point_idx + 1) * 2]; - loc_h = ((T *)data_sampling_loc_gdram_start) - [(level_idx * num_points + point_idx + 1) * 2 + 1]; - weight_next_point = - ((T *)data_attn_weight_gdram_start)[level_idx * num_points + - point_idx + 1]; - x_next_point = loc_w * spatial_w - 0.5; - y_next_point = loc_h * spatial_h - 0.5; - if (y_next_point > -1 && x_next_point > -1 && - y_next_point < spatial_h && x_next_point < spatial_w) { - loadNeighborPointsData( - (T *)data_value_ptr, - (T *)(ping_data_value_p1_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p2_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p3_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p4_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - span_num_deal, spatial_w, spatial_h, num_heads, channels, - x_next_point, y_next_point, head_idx); - } - } - - // compute - if (y > -1 && x > -1 && y < spatial_h && x < spatial_w) { - bilinearInterpolation( - (T *)(ping_data_value_p1_nram + - ((level_idx * num_points + point_idx) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p2_nram + - ((level_idx * num_points + point_idx) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p3_nram + - ((level_idx * num_points + point_idx) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p4_nram + - ((level_idx * num_points + point_idx) % 2) * - ping_pong_gap), - (T *)auxiliary_a, (T *)auxiliary_b, span_num_deal, spatial_w, - spatial_h, x, y); - __bang_mul_scalar((T *)auxiliary_a, (T *)auxiliary_a, (T)weight, - span_num_deal); - __bang_add((T *)(ping_data_col_nram + - data_col_ping_pong_idx * ping_pong_gap), - (T *)(ping_data_col_nram + - data_col_ping_pong_idx * ping_pong_gap), - (T *)auxiliary_a, span_num_deal); - } - - spatial_w = spatial_w_next_point; - spatial_h = spatial_h_next_point; - weight = weight_next_point; - x = x_next_point; - y = y_next_point; - __asm__ volatile("sync;"); - } - } - // store - __memcpy_async( - data_col_gdram_start + c_seg_idx * span_num_deal * sizeof(T), - ping_data_col_nram + data_col_ping_pong_idx * ping_pong_gap, - span_num_deal * sizeof(T), NRAM2GDRAM); - data_col_ping_pong_idx = (data_col_ping_pong_idx + 1) % 2; - } - - if (channels_rem > 0) { - __bang_write_value( - (T *)(ping_data_col_nram + data_col_ping_pong_idx * ping_pong_gap), - channels_align_rem, (T)0); - // load data - // level_idx = 0, point_idx = 0 - __memcpy(data_spatial_shapes_nram, data_spatial_shapes_gdram, - 2 * sizeof(int32_t), GDRAM2NRAM); - int32_t spatial_h = ((int32_t *)data_spatial_shapes_nram)[0]; - int32_t spatial_w = ((int32_t *)data_spatial_shapes_nram)[1]; - const char *data_value_ptr = - data_value_gdram_start + channels_seg_num * span_num_deal * sizeof(T); - T loc_w = ((T *)data_sampling_loc_gdram_start)[0]; - T loc_h = ((T *)data_sampling_loc_gdram_start)[1]; - T weight = ((T *)data_attn_weight_gdram_start)[0]; - T x = loc_w * spatial_w - 0.5; - T y = loc_h * spatial_h - 0.5; - if (y > -1 && x > -1 && y < spatial_h && x < spatial_w) { - loadNeighborPointsData( - (T *)data_value_ptr, (T *)ping_data_value_p1_nram, - (T *)ping_data_value_p2_nram, (T *)ping_data_value_p3_nram, - (T *)ping_data_value_p4_nram, channels_rem, spatial_w, spatial_h, - num_heads, channels, x, y, head_idx); - } - T spatial_h_next_point = 0; - T spatial_w_next_point = 0; - T weight_next_point = 0; - T x_next_point = 0; - T y_next_point = 0; - __asm__ volatile("sync;"); - - for (int32_t level_idx = 0; level_idx < num_levels; ++level_idx) { - for (int32_t point_idx = 0; point_idx < num_points; ++point_idx) { - // load data - if (point_idx == num_points - 1 && level_idx == num_levels - 1) { - // last point no need to load data, continue to compute - } else if (point_idx == num_points - 1) { - const int32_t level_start_id = - ((int32_t *)data_level_start_index_gdram)[level_idx + 1]; - const int32_t spatial_h_ptr = (level_idx + 1) << 1; - __memcpy( - data_spatial_shapes_nram, - data_spatial_shapes_gdram + spatial_h_ptr * sizeof(int32_t), - 2 * sizeof(int32_t), GDRAM2NRAM); - spatial_h_next_point = ((int32_t *)data_spatial_shapes_nram)[0]; - spatial_w_next_point = ((int32_t *)data_spatial_shapes_nram)[1]; - data_value_ptr = data_value_gdram_start + - (level_start_id * num_heads * channels + - channels_seg_num * span_num_deal) * - sizeof(T); - loc_w = ((T *)data_sampling_loc_gdram_start) - [(level_idx * num_points + point_idx + 1) * 2]; - loc_h = ((T *)data_sampling_loc_gdram_start) - [(level_idx * num_points + point_idx + 1) * 2 + 1]; - weight_next_point = - ((T *)data_attn_weight_gdram_start)[level_idx * num_points + - point_idx + 1]; - x_next_point = loc_w * spatial_w_next_point - 0.5; - y_next_point = loc_h * spatial_h_next_point - 0.5; - if (y_next_point > -1 && x_next_point > -1 && - y_next_point < spatial_h_next_point && - x_next_point < spatial_w_next_point) { - loadNeighborPointsData( - (T *)data_value_ptr, - (T *)(ping_data_value_p1_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p2_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p3_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p4_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - channels_rem, spatial_w_next_point, spatial_h_next_point, - num_heads, channels, x_next_point, y_next_point, head_idx); - } - } else { - spatial_w_next_point = spatial_w; - spatial_h_next_point = spatial_h; - loc_w = ((T *)data_sampling_loc_gdram_start) - [(level_idx * num_points + point_idx + 1) * 2]; - loc_h = ((T *)data_sampling_loc_gdram_start) - [(level_idx * num_points + point_idx + 1) * 2 + 1]; - weight_next_point = - ((T *)data_attn_weight_gdram_start)[level_idx * num_points + - point_idx + 1]; - x_next_point = loc_w * spatial_w - 0.5; - y_next_point = loc_h * spatial_h - 0.5; - if (y_next_point > -1 && x_next_point > -1 && - y_next_point < spatial_h && x_next_point < spatial_w) { - loadNeighborPointsData( - (T *)data_value_ptr, - (T *)(ping_data_value_p1_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p2_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p3_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p4_nram + - ((level_idx * num_points + point_idx + 1) % 2) * - ping_pong_gap), - channels_rem, spatial_w, spatial_h, num_heads, channels, - x_next_point, y_next_point, head_idx); - } - } - - // compute - if (y > -1 && x > -1 && y < spatial_h && x < spatial_w) { - bilinearInterpolation( - (T *)(ping_data_value_p1_nram + - ((level_idx * num_points + point_idx) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p2_nram + - ((level_idx * num_points + point_idx) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p3_nram + - ((level_idx * num_points + point_idx) % 2) * - ping_pong_gap), - (T *)(ping_data_value_p4_nram + - ((level_idx * num_points + point_idx) % 2) * - ping_pong_gap), - (T *)auxiliary_a, (T *)auxiliary_b, channels_align_rem, - spatial_w, spatial_h, x, y); - __bang_mul_scalar((T *)auxiliary_a, (T *)auxiliary_a, (T)weight, - channels_align_rem); - __bang_add((T *)(ping_data_col_nram + - data_col_ping_pong_idx * ping_pong_gap), - (T *)(ping_data_col_nram + - data_col_ping_pong_idx * ping_pong_gap), - (T *)auxiliary_a, channels_align_rem); - } - - spatial_w = spatial_w_next_point; - spatial_h = spatial_h_next_point; - weight = weight_next_point; - x = x_next_point; - y = y_next_point; - __asm__ volatile("sync;"); - } - } - // store - __memcpy_async( - data_col_gdram_start + channels_seg_num * span_num_deal * sizeof(T), - ping_data_col_nram + data_col_ping_pong_idx * ping_pong_gap, - channels_rem * sizeof(T), NRAM2GDRAM); - data_col_ping_pong_idx = (data_col_ping_pong_idx + 1) % 2; - } - } - __asm__ volatile("sync;"); - return; -} - -template __mlu_global__ void MLUKernelMsDeformAttnForward( - const char *data_value_gdram, const char *data_spatial_shapes_gdram, - const char *data_level_start_index_gdram, - const char *data_sampling_loc_gdram, const char *data_attn_weight_gdram, - const int32_t batch_size, const int32_t num_keys, const int32_t num_heads, - const int32_t channels, const int32_t num_levels, const int32_t num_queries, - const int32_t num_points, char *data_col_gdram); - -void KernelMsDeformAttnForward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const cnrtDataType_t d_type, const char *data_value_gdram, - const char *data_spatial_shapes_gdram, - const char *data_level_start_index_gdram, - const char *data_sampling_loc_gdram, const char *data_attn_weight_gdram, - const int32_t batch_size, const int32_t num_keys, const int32_t num_heads, - const int32_t channels, const int32_t num_levels, const int32_t num_queries, - const int32_t num_points, char *data_col_gdram) { - MLUKernelMsDeformAttnForward<<>>( - data_value_gdram, data_spatial_shapes_gdram, data_level_start_index_gdram, - data_sampling_loc_gdram, data_attn_weight_gdram, batch_size, num_keys, - num_heads, channels, num_levels, num_queries, num_points, data_col_gdram); -} - -template -void __mlu_func__ msDeformAttnCol2imBilinear( - T *top_grad_temp, const int32_t &height, const int32_t &width, const T &w1, - const T &w2, const T &w3, const T &w4, const int32_t &h_low, - const int32_t &w_low, const int32_t &h_high, const int32_t &w_high, - const int32_t &base_ptr, const int32_t &h_low_ptr_offset, - const int32_t &w_low_ptr_offset, const int32_t &h_high_ptr_offset, - const int32_t &w_high_ptr_offset, const T &hh, const T &hw, const T &lh, - const T &lw, T *top_grad, const T &data_attn_weight, T *grad_h_weight, - T *grad_w_weight, T *grad_value, T *grad_output_nram, T *grad_weight, - T *grad_sampling_loc, T *grad_attn_weight, T *grad_output_nram_temp, - const int32_t &deal_num, const int32_t &deal_num_real, - const T *data_value_ptr) { - if (h_low >= 0 && w_low >= 0) { - int32_t offset1 = h_low_ptr_offset + w_low_ptr_offset + base_ptr; - __memcpy(grad_output_nram, data_value_ptr + offset1, - deal_num_real * sizeof(T), GDRAM2NRAM); - __bang_mul_scalar(grad_weight, grad_output_nram, hw, deal_num); - __bang_sub(grad_h_weight, grad_h_weight, grad_weight, deal_num); - __bang_mul_scalar(grad_weight, grad_output_nram, hh, deal_num); - __bang_sub(grad_w_weight, grad_w_weight, grad_weight, deal_num); - - __bang_mul_scalar(top_grad_temp, top_grad, data_attn_weight, deal_num); - __bang_mul_scalar(top_grad_temp, top_grad_temp, w1, deal_num); - // for calc grad_attn_weight - __bang_mul_scalar(grad_output_nram, grad_output_nram, w1, deal_num); - __bang_atomic_add((T *)top_grad_temp, (T *)(grad_value + offset1), - (T *)top_grad_temp, deal_num_real); - } - if (h_low >= 0 && w_high <= width - 1) { - int32_t offset2 = h_low_ptr_offset + w_high_ptr_offset + base_ptr; - __memcpy(grad_output_nram_temp, data_value_ptr + offset2, - deal_num_real * sizeof(T), GDRAM2NRAM); - __bang_mul_scalar(grad_weight, grad_output_nram_temp, lw, deal_num); - __bang_sub(grad_h_weight, grad_h_weight, grad_weight, deal_num); - __bang_mul_scalar(grad_weight, grad_output_nram_temp, hh, deal_num); - __bang_add(grad_w_weight, grad_w_weight, grad_weight, deal_num); - - __bang_mul_scalar(top_grad_temp, top_grad, data_attn_weight, deal_num); - __bang_mul_scalar(top_grad_temp, top_grad_temp, w2, deal_num); - - __bang_mul_scalar(grad_output_nram_temp, grad_output_nram_temp, w2, - deal_num); - __bang_add(grad_output_nram, grad_output_nram, grad_output_nram_temp, - deal_num); - __bang_atomic_add((T *)top_grad_temp, (T *)(grad_value + offset2), - (T *)top_grad_temp, deal_num_real); - } - if (h_high <= height - 1 && w_low >= 0) { - int32_t offset3 = h_high_ptr_offset + w_low_ptr_offset + base_ptr; - __memcpy(grad_output_nram_temp, data_value_ptr + offset3, - deal_num_real * sizeof(T), GDRAM2NRAM); - __bang_mul_scalar(grad_weight, grad_output_nram_temp, hw, deal_num); - __bang_add(grad_h_weight, grad_h_weight, grad_weight, deal_num); - __bang_mul_scalar(grad_weight, grad_output_nram_temp, lh, deal_num); - __bang_sub(grad_w_weight, grad_w_weight, grad_weight, deal_num); - - __bang_mul_scalar(top_grad_temp, top_grad, data_attn_weight, deal_num); - __bang_mul_scalar(top_grad_temp, top_grad_temp, w3, deal_num); - // for calc grad_attn_weight - __bang_mul_scalar(grad_output_nram_temp, grad_output_nram_temp, w3, - deal_num); - __bang_add(grad_output_nram, grad_output_nram, grad_output_nram_temp, - deal_num); - __bang_atomic_add((T *)top_grad_temp, (T *)(grad_value + offset3), - (T *)top_grad_temp, deal_num_real); - } - if (h_high <= height - 1 && w_high <= width - 1) { - int32_t offset4 = h_high_ptr_offset + w_high_ptr_offset + base_ptr; - __memcpy(grad_output_nram_temp, data_value_ptr + offset4, - deal_num_real * sizeof(T), GDRAM2NRAM); - __bang_mul_scalar(grad_weight, grad_output_nram_temp, lw, deal_num); - __bang_add(grad_h_weight, grad_h_weight, grad_weight, deal_num); - __bang_mul_scalar(grad_weight, grad_output_nram_temp, lh, deal_num); - __bang_add(grad_w_weight, grad_w_weight, grad_weight, deal_num); - - __bang_mul_scalar(top_grad_temp, top_grad, data_attn_weight, deal_num); - __bang_mul_scalar(top_grad_temp, top_grad_temp, w4, deal_num); - // for calc grad_attn_weight - __bang_mul_scalar(grad_output_nram_temp, grad_output_nram_temp, w4, - deal_num); - __bang_add(grad_output_nram, grad_output_nram, grad_output_nram_temp, - deal_num); - - __bang_atomic_add((T *)top_grad_temp, (T *)(grad_value + offset4), - (T *)top_grad_temp, deal_num_real); - } - __bang_mul(grad_output_nram, grad_output_nram, top_grad, deal_num); -#if __BANG_ARCH__ >= 322 - recursiveSumPool(grad_output_nram, 1, deal_num_real, ALIGN_NUM_FOR_REDUCE); -#else - const int32_t align_num_on_200 = NFU_ALIGN_SIZE / sizeof(float); - recursiveSumPool(grad_output_nram, align_num_on_200, - deal_num / align_num_on_200, ALIGN_NUM_FOR_REDUCE); - __bang_reduce_sum(grad_output_nram, grad_output_nram, - NFU_ALIGN_SIZE / sizeof(float)); -#endif - __bang_atomic_add((T *)grad_output_nram, (T *)grad_attn_weight, - (T *)grad_output_nram, 1); - __bang_mul_scalar(grad_w_weight, grad_w_weight, width, deal_num); - __bang_mul_scalar(top_grad_temp, top_grad, data_attn_weight, deal_num); - __bang_mul(grad_w_weight, grad_w_weight, top_grad_temp, deal_num); -#if __BANG_ARCH__ >= 322 - recursiveSumPool(grad_w_weight, 1, deal_num_real, ALIGN_NUM_FOR_REDUCE); -#else - recursiveSumPool(grad_w_weight, align_num_on_200, deal_num / align_num_on_200, - ALIGN_NUM_FOR_REDUCE); - __bang_reduce_sum(grad_w_weight, grad_w_weight, - NFU_ALIGN_SIZE / sizeof(float)); -#endif - __bang_atomic_add((T *)grad_w_weight, (T *)(grad_sampling_loc), - (T *)grad_w_weight, 1); - - __bang_mul_scalar(grad_h_weight, grad_h_weight, height, deal_num); - __bang_mul(grad_h_weight, grad_h_weight, top_grad_temp, deal_num); -#if __BANG_ARCH__ >= 322 - recursiveSumPool(grad_h_weight, 1, deal_num_real, ALIGN_NUM_FOR_REDUCE); -#else - recursiveSumPool(grad_h_weight, align_num_on_200, deal_num / align_num_on_200, - ALIGN_NUM_FOR_REDUCE); - __bang_reduce_sum(grad_h_weight, grad_h_weight, - NFU_ALIGN_SIZE / sizeof(float)); -#endif - __bang_atomic_add((T *)grad_h_weight, (T *)(grad_sampling_loc + 1), - (T *)grad_h_weight, 1); -} - -__mlu_global__ void MLUUnion1KernelMsDeformAttnBackward( - const float *data_value, const int32_t *spatial_shapes, - const int32_t *data_level_start_index, const float *data_sampling_loc, - const float *data_attn_weight, const float *grad_output, - const int32_t batch, const int32_t spatial_size, const int32_t num_heads, - const int32_t channels, const int32_t num_levels, const int32_t num_query, - const int32_t num_points, float *grad_value, float *grad_sampling_loc, - float *grad_attn_weight) { - if (coreId == 0x80) { - return; - } - const int32_t split_num = 8; - const int32_t spatial_shapes_size = 64; - int32_t deal_num = PAD_DOWN( - (MAX_NRAM_SIZE - spatial_shapes_size) / split_num / sizeof(float), - ALIGN_NUM); - float *grad_output_nram = (float *)nram_buffer; - float *grad_output_nram_temp = (float *)nram_buffer + deal_num; - float *grad_weight = (float *)nram_buffer + 2 * deal_num; - float *grad_h_weight = (float *)nram_buffer + 3 * deal_num; - float *grad_w_weight = (float *)nram_buffer + 4 * deal_num; - float *top_grad = (float *)nram_buffer + 5 * deal_num; - float *top_grad_temp = (float *)nram_buffer + 6 * deal_num; - int32_t *spatial_shapes_nram = - (int32_t *)((float *)nram_buffer + 7 * deal_num); - float *sampling_loc_nram = - (float *)nram_buffer + 7 * deal_num + 2 * sizeof(int32_t); - const int32_t total_num = batch * num_query * num_heads * num_levels; - int32_t num_per_core = total_num / taskDim; - int32_t num_rem = total_num % taskDim; - num_per_core = num_per_core + int32_t(taskId < num_rem); - int32_t start_per_core = - num_rem > taskId - ? (taskId * num_per_core) - : ((num_per_core + 1) * num_rem + (taskId - num_rem) * num_per_core); - int32_t end_per_core = start_per_core + num_per_core; - const int32_t C_repeat = channels / deal_num; - const int32_t C_tail = channels % deal_num; - const int32_t qid_stride = num_heads * channels; - int32_t base_ptr = 0; - for (int32_t num_loop = start_per_core; num_loop < end_per_core; ++num_loop) { - const int32_t l_col = num_loop % num_levels; - const int32_t m_col = num_loop / num_levels % num_heads; - const int32_t q_col = num_loop / num_levels / num_heads % num_query; - const int32_t b_col = num_loop / num_query / num_heads / num_levels; - int32_t data_weight_ptr = num_loop * num_points; - int32_t data_loc_w_ptr = data_weight_ptr << 1; - const int32_t value_offset = b_col * spatial_size * num_heads * channels; - const int32_t level_start_id = data_level_start_index[l_col]; - int32_t spatial_h_ptr = l_col << 1; - int32_t grad_output_offset = b_col * num_query * num_heads * channels + - q_col * num_heads * channels + - m_col * channels; - __memcpy(spatial_shapes_nram, spatial_shapes + spatial_h_ptr, - 2 * sizeof(int32_t), GDRAM2NRAM); - const int32_t spatial_h = spatial_shapes_nram[0]; - const int32_t spatial_w = spatial_shapes_nram[1]; - const int32_t value_ptr_offset = value_offset + level_start_id * qid_stride; - const float *data_value_ptr = data_value + value_ptr_offset; - float *grad_value_ptr = grad_value + value_ptr_offset; - const int32_t grad_attn_weight_out = num_loop * num_points; - const int32_t grad_sampling_loc_out = num_loop * num_points * 2; - for (int32_t p_col = 0; p_col < num_points; ++p_col) { - __memcpy(sampling_loc_nram, data_sampling_loc + data_loc_w_ptr, - 2 * sizeof(float), GDRAM2NRAM); - const float loc_w = sampling_loc_nram[0]; - const float loc_h = sampling_loc_nram[1]; - const float weight = data_attn_weight[data_weight_ptr]; - const float h_im = loc_h * spatial_h - 0.5; - const float w_im = loc_w * spatial_w - 0.5; - if (h_im > -1 && w_im > -1 && h_im < spatial_h && w_im < spatial_w) { - const int32_t h_low = floorf(h_im); - const int32_t w_low = floorf(w_im); - const int32_t h_high = h_low + 1; - const int32_t w_high = w_low + 1; - - const float lh = h_im - h_low; - const float lw = w_im - w_low; - const float hh = 1.0 - lh; - const float hw = 1.0 - lw; - - const int32_t w_stride = num_heads * channels; - const int32_t h_stride = spatial_w * w_stride; - const int32_t h_low_ptr_offset = h_low * h_stride; - const int32_t h_high_ptr_offset = h_low_ptr_offset + h_stride; - const int32_t w_low_ptr_offset = w_low * w_stride; - const int32_t w_high_ptr_offset = w_low_ptr_offset + w_stride; - - float w1 = hh * hw; - float w2 = hh * lw; - float w3 = lh * hw; - float w4 = lh * lw; - - for (int32_t C_loop = 0; C_loop < C_repeat; ++C_loop) { - base_ptr = m_col * channels + C_loop * deal_num; - __bang_write_zero(grad_weight, 3 * deal_num); - __bang_write_zero(grad_output_nram, deal_num); - __memcpy(top_grad, - grad_output + grad_output_offset + C_loop * deal_num, - deal_num * sizeof(float), GDRAM2NRAM); - msDeformAttnCol2imBilinear( - top_grad_temp, spatial_h, spatial_w, w1, w2, w3, w4, h_low, w_low, - h_high, w_high, base_ptr, h_low_ptr_offset, w_low_ptr_offset, - h_high_ptr_offset, w_high_ptr_offset, hh, hw, lh, lw, top_grad, - weight, grad_h_weight, grad_w_weight, grad_value_ptr, - grad_output_nram, grad_weight, - grad_sampling_loc + grad_sampling_loc_out + p_col * 2, - grad_attn_weight + grad_attn_weight_out + p_col, - grad_output_nram_temp, deal_num, deal_num, data_value_ptr); - } - if (C_tail != 0) { - base_ptr = m_col * channels + C_repeat * deal_num; - __bang_write_zero(grad_output_nram, 8 * deal_num); - __memcpy(top_grad, - grad_output + grad_output_offset + C_repeat * deal_num, - C_tail * sizeof(float), GDRAM2NRAM); - msDeformAttnCol2imBilinear( - top_grad_temp, spatial_h, spatial_w, w1, w2, w3, w4, h_low, w_low, - h_high, w_high, base_ptr, h_low_ptr_offset, w_low_ptr_offset, - h_high_ptr_offset, w_high_ptr_offset, hh, hw, lh, lw, top_grad, - weight, grad_h_weight, grad_w_weight, grad_value_ptr, - grad_output_nram, grad_weight, - grad_sampling_loc + grad_sampling_loc_out + p_col * 2, - grad_attn_weight + grad_attn_weight_out + p_col, - grad_output_nram_temp, deal_num, C_tail, data_value_ptr); - } - } - data_weight_ptr += 1; - data_loc_w_ptr += 2; - } - } -} - -__mlu_global__ void MLUUnion1KernelMsDeformAttnBackward( - const float *data_value, const int32_t *spatial_shapes, - const int32_t *data_level_start_index, const float *data_sampling_loc, - const float *data_attn_weight, const float *grad_output, - const int32_t batch, const int32_t spatial_size, const int32_t num_heads, - const int32_t channels, const int32_t num_levels, const int32_t num_query, - const int32_t num_points, float *grad_value, float *grad_sampling_loc, - float *grad_attn_weight); - -void KernelMsDeformAttnBackward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const cnrtDataType_t d_type, const float *data_value, - const int32_t *spatial_shapes, const int32_t *data_level_start_index, - const float *data_sampling_loc, const float *data_attn_weight, - const float *grad_output, const int32_t batch, const int32_t spatial_size, - const int32_t num_heads, const int32_t channels, const int32_t num_levels, - const int32_t num_query, const int32_t num_points, float *grad_value, - float *grad_sampling_loc, float *grad_attn_weight) { - MLUUnion1KernelMsDeformAttnBackward<<>>( - data_value, spatial_shapes, data_level_start_index, data_sampling_loc, - data_attn_weight, grad_output, batch, spatial_size, num_heads, channels, - num_levels, num_query, num_points, grad_value, grad_sampling_loc, - grad_attn_weight); -} diff --git a/mmcv/ops/csrc/common/mlu/nms_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/nms_mlu_kernel.mlu deleted file mode 100644 index dcc722d..0000000 --- a/mmcv/ops/csrc/common/mlu/nms_mlu_kernel.mlu +++ /dev/null @@ -1,483 +0,0 @@ -/************************************************************************* - * Copyright (C) 2021 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "nms_utils.hpp" - -#define COORD_DIM (4) - -#define SIZE_NRAM_BUF (MAX_NRAM_SIZE + REM_FOR_STACK - 62 * 1024) -#define SIZE_SRAM_BUF (MAX_SRAM_SIZE) - -__nram__ int8_t nram_buffer[SIZE_NRAM_BUF]; -__mlu_shared__ int8_t sram_buffer[SIZE_SRAM_BUF]; - -enum Addr { SRAM, GDRAM }; - -template -__mlu_func__ void nms_detection( - uint32_t &output_box_num, const int output_mode, OUT_DT *output_dram, - IN_DT *input_data_score, const IN_DT *input_data_box, const Addr input_ram, - IN_DT *sram, const int core_limit, const int input_num_boxes, - const int max_output_size, const float thresh_iou, const float thresh_score, - const float offset, const int algo) { - // global value - int32_t *exit_flag = (int32_t *)(sram + 28); - exit_flag[0] = 0; - // score, x1, y1, x2, y2, inter_x1, inter_y1, inter_x2, inter_y2 - int nms_buffer_count1 = 9; - // temp nram buffer to store selected target. - int nram_save_limit_count = 256; - float div_thresh_iou = 1.0 / thresh_iou; - - // input data ptr - const IN_DT *input_x1_ptr = input_data_box; - const IN_DT *input_y1_ptr = input_x1_ptr + input_num_boxes; - const IN_DT *input_x2_ptr = input_y1_ptr + input_num_boxes; - const IN_DT *input_y2_ptr = input_x2_ptr + input_num_boxes; - - int limit = 0; // find limit when GDRAM or SRAM - int max_seg_pad = 0; // the max length every repeat - int repeat = 0; - int remain = 0; - int remain_pad = 0; - int input_offset = 0; // offset of input_data for current core - int nram_save_count = 0; - - if (output_mode == 0) { - limit = (SIZE_NRAM_BUF - NFU_ALIGN_SIZE /*for max_box*/ * sizeof(IN_DT) - - nram_save_limit_count * sizeof(OUT_DT)) / - (nms_buffer_count1 * sizeof(IN_DT)); - } else { - // 5 maens: score, x1, y1, x2, y2 - limit = (SIZE_NRAM_BUF - NFU_ALIGN_SIZE /*for max_box*/ * sizeof(IN_DT) - - nram_save_limit_count * 5 * sizeof(OUT_DT)) / - (nms_buffer_count1 * sizeof(IN_DT)); - } - - int max_seg_iou_compute = 0; - int repeat_iou_compute = 0; - int remain_iou_compute = 0; - int remain_pad_iou_compute = 0; - - getComputeParamsBlockOrU1(sizeof(IN_DT), input_num_boxes, limit, core_limit, - input_offset, max_seg_pad, repeat, remain, - remain_pad, max_seg_iou_compute, repeat_iou_compute, - remain_iou_compute, remain_pad_iou_compute); - - // init the data ptr - IN_DT *score = (IN_DT *)nram_buffer; - IN_DT *x1 = score + max_seg_pad; - IN_DT *y1 = x1 + max_seg_pad; - IN_DT *x2 = y1 + max_seg_pad; - IN_DT *y2 = x2 + max_seg_pad; - IN_DT *inter_x1 = y2 + max_seg_pad; - IN_DT *inter_y1 = inter_x1 + max_seg_pad; - IN_DT *inter_x2 = inter_y1 + max_seg_pad; - IN_DT *inter_y2 = inter_x2 + max_seg_pad; - IN_DT *max_box = inter_y2 + max_seg_pad; // the max score, x1, y1, x2, y2 - OUT_DT *nram_save = - (OUT_DT *)((char *)max_box + - NFU_ALIGN_SIZE); // offset two line from max_box - -#if __BANG_ARCH__ >= 300 - float max_box_x1 = 0; - float max_box_y1 = 0; - float max_box_x2 = 0; - float max_box_y2 = 0; -#endif - mluMemcpyDirection_t load_dir = SRAM2NRAM; - mluMemcpyDirection_t store_dir = NRAM2SRAM; - load_dir = (input_ram == SRAM) ? SRAM2NRAM : GDRAM2NRAM; - store_dir = (input_ram == SRAM) ? NRAM2SRAM : NRAM2GDRAM; - - for (int keep = 0; keep < max_output_size; - keep++) { // loop until the max_score <= 0 - if (core_limit != 1) { - __sync_cluster(); // sync before current loop - } - - /******FIND MAX START******/ - int max_index = 0; // the max score index - int global_max_index = 0; // for U1 - float max_area = 0; // the max socre area - max_box[0] = 0; // init 0 - findCoreMaxBox(input_data_score, score, inter_x1, max_box, input_x1_ptr, - input_y1_ptr, input_x2_ptr, input_y2_ptr, load_dir, - input_offset, repeat, remain, remain_pad, max_seg_pad, - max_index); - - if (core_limit == 1) { -#if __BANG_ARCH__ >= 300 - calMaxArea(max_box, algo, offset, max_area, max_box_x1, max_box_y1, - max_box_x2, max_box_y2); -#else - calMaxArea(max_box, algo, offset, max_area); -#endif - input_data_score[max_index] = 0; - global_max_index = max_index; - } else if (core_limit == 4) { - __sync_cluster(); - findClusterMaxBox(sram, max_box, inter_x1, input_data_score, core_limit); - -#if __BANG_ARCH__ >= 300 - calMaxArea(max_box, algo, offset, max_area, max_box_x1, max_box_y1, - max_box_x2, max_box_y2); -#else - calMaxArea(max_box, algo, offset, max_area); -#endif - global_max_index = ((uint32_t *)(max_box + 5))[0]; - input_data_score[global_max_index] = 0; - } - // by now, we get: max_score|max_index|max_box|max_area - /******FIND MAX END******/ - - storeResult(max_box, nram_save, output_dram, keep, nram_save_limit_count, - max_output_size, thresh_score, output_mode, nram_save_count, - output_box_num); - - // if the max score <= 0, end - if (core_limit == 1) { - if (float(max_box[0]) <= thresh_score) { - break; - } - } else { - if (float(max_box[0]) <= thresh_score) { - if (coreId == 0) { - exit_flag[0] = 1; - } - } - __sync_cluster(); - if (exit_flag[0] == 1) { - break; - } - } -/******NMS STORE END******/ -#if __BANG_ARCH__ >= 300 - scoreUpdate(input_data_score, load_dir, store_dir, input_x1_ptr, - input_y1_ptr, input_x2_ptr, input_y2_ptr, x1, y1, x2, y2, score, - inter_x1, inter_y1, inter_x2, inter_y2, max_box, max_box_x1, - max_box_y1, max_box_x2, max_box_y2, nram_save, - repeat_iou_compute, remain_iou_compute, remain_pad_iou_compute, - max_seg_iou_compute, max_seg_pad, thresh_iou, div_thresh_iou, - input_offset, offset, max_area, input_num_boxes, algo); -#else - scoreUpdate(input_data_score, load_dir, store_dir, input_x1_ptr, - input_y1_ptr, input_x2_ptr, input_y2_ptr, x1, y1, x2, y2, score, - inter_x1, inter_y1, inter_x2, inter_y2, max_box, max_box[1], - max_box[2], max_box[3], max_box[4], nram_save, - repeat_iou_compute, remain_iou_compute, remain_pad_iou_compute, - max_seg_iou_compute, max_seg_pad, thresh_iou, div_thresh_iou, - input_offset, offset, max_area, input_num_boxes, algo); -#endif - } // for max_output_size -} - -__mlu_global__ void MLUUnion1KernelNMS( - const void *input_boxes, const void *input_confidence, - const int input_num_boxes, const int max_output_size, - const float iou_threshold, const float confidence_threshold, - const int output_mode, void *workspace, void *result_num, void *output, - const cnrtDataType_t data_type_input, const float offset, const int algo) { - if (data_type_input == CNRT_FLOAT16) { - __memcpy(workspace, input_confidence, input_num_boxes * sizeof(half), - GDRAM2GDRAM); - } else if (data_type_input == CNRT_FLOAT32) { - __memcpy(workspace, input_confidence, input_num_boxes * sizeof(float), - GDRAM2GDRAM); - } else { - } - - uint32_t output_box_num = 0; - float *score_data = (float *)workspace; - float *boxes_data = (float *)input_boxes; - float *sram = (float *)sram_buffer; - - if (output_mode == 0) { - if (data_type_input == CNRT_FLOAT32) { - nms_detection(output_box_num, output_mode, (uint32_t *)output, score_data, - boxes_data, GDRAM, sram, taskDim, input_num_boxes, - max_output_size, iou_threshold, confidence_threshold, - offset, algo); - } else { - nms_detection(output_box_num, output_mode, (uint32_t *)output, - (half *)score_data, (half *)boxes_data, GDRAM, (half *)sram, - taskDim, input_num_boxes, max_output_size, iou_threshold, - confidence_threshold, offset, algo); - } - } else { - if (data_type_input == CNRT_FLOAT32) { - nms_detection(output_box_num, output_mode, (float *)output, score_data, - boxes_data, GDRAM, sram, taskDim, input_num_boxes, - max_output_size, iou_threshold, confidence_threshold, - offset, algo); - } else { - nms_detection(output_box_num, output_mode, (half *)output, - (half *)score_data, (half *)boxes_data, GDRAM, (half *)sram, - taskDim, input_num_boxes, max_output_size, iou_threshold, - confidence_threshold, offset, algo); - } - } - ((uint32_t *)result_num)[0] = output_box_num; -} - -template -__mlu_func__ void nms_detection_ux( - int32_t *exit_flag, uint32_t &output_box_num, OUT_DT *output_dram, - IN_DT *score_data, const IN_DT *boxes_data, const Addr input_ram, - const int input_num_boxes, const int max_output_size, - const float thresh_iou, const float thresh_score, const float offset, - const int output_mode, const int algo, char *cdma_gdram) { - exit_flag[0] = 0; - - IN_DT *sram = (IN_DT *)sram_buffer; - - // score, x1, y1, x2, y2, inter_x1, inter_y1, inter_x2, inter_y2 - int nms_buffer_count1 = 9; - // temp nram buffer to store selected target. - int nram_save_limit_count = 256; - float div_thresh_iou = 1.0 / thresh_iou; - - // input data ptr - const IN_DT *input_x1_ptr = boxes_data; - const IN_DT *input_y1_ptr = input_x1_ptr + input_num_boxes; - const IN_DT *input_x2_ptr = input_y1_ptr + input_num_boxes; - const IN_DT *input_y2_ptr = input_x2_ptr + input_num_boxes; - - int limit = 0; // find limit when GDRAM or SRAM - int max_seg_pad = 0; // the max length every repeat - int repeat = 0; - int remain = 0; - int remain_pad = 0; - int nram_save_count = 0; - - if (output_mode == 0) { - limit = (SIZE_NRAM_BUF - NFU_ALIGN_SIZE /*for max_box*/ * sizeof(IN_DT) - - nram_save_limit_count * sizeof(OUT_DT)) / - (nms_buffer_count1 * sizeof(IN_DT)); - } else { - limit = (SIZE_NRAM_BUF - NFU_ALIGN_SIZE /*for max_box*/ * sizeof(IN_DT) - - nram_save_limit_count * INFO_NUM * sizeof(OUT_DT)) / - (nms_buffer_count1 * sizeof(IN_DT)); - } - - int input_offset = 0; - int max_seg_iou_compute = 0; - int repeat_iou_compute = 0; - int remain_iou_compute = 0; - int remain_pad_iou_compute = 0; - - getComputeParamsUx(sizeof(IN_DT), input_num_boxes, limit, input_offset, - max_seg_pad, repeat, remain, remain_pad, - max_seg_iou_compute, repeat_iou_compute, - remain_iou_compute, remain_pad_iou_compute); - // init the nram ptr - IN_DT *score = (IN_DT *)nram_buffer; - IN_DT *x1 = score + max_seg_pad; - IN_DT *y1 = x1 + max_seg_pad; - IN_DT *x2 = y1 + max_seg_pad; - IN_DT *y2 = x2 + max_seg_pad; - IN_DT *inter_x1 = y2 + max_seg_pad; - IN_DT *inter_y1 = inter_x1 + max_seg_pad; - IN_DT *inter_x2 = inter_y1 + max_seg_pad; - IN_DT *inter_y2 = inter_x2 + max_seg_pad; - IN_DT *max_box = inter_y2 + max_seg_pad; // the max score, x1, y1, x2, y2 - OUT_DT *nram_save = - (OUT_DT *)((char *)max_box + - NFU_ALIGN_SIZE); // offset two line from max_box -#if __BANG_ARCH__ >= 300 - float max_box_x1 = 0; - float max_box_y1 = 0; - float max_box_x2 = 0; - float max_box_y2 = 0; -#endif - mluMemcpyDirection_t load_dir = SRAM2NRAM; - mluMemcpyDirection_t store_dir = NRAM2SRAM; - load_dir = (input_ram == SRAM) ? SRAM2NRAM : GDRAM2NRAM; - store_dir = (input_ram == SRAM) ? NRAM2SRAM : NRAM2GDRAM; - - for (int keep = 0; keep < max_output_size; - keep++) { // loop until the max_score <= 0 - __sync_all(); - - int max_index = 0; - int global_max_index = 0; // for Ux - float max_area = 0; // the max socre area - max_box[0] = 0; // init 0 - - if (coreId == 0) { - findCoreMaxBox(score_data, score, inter_x1, max_box, input_x1_ptr, - input_y1_ptr, input_x2_ptr, input_y2_ptr, load_dir, - input_offset, repeat, remain, remain_pad, max_seg_pad, - max_index); - // copy max box info to sram - __memcpy(sram, max_box, REDUCE_NUM * sizeof(IN_DT), NRAM2SRAM); - } - __sync_all(); -#if __BANG_ARCH__ >= 590 - __memcpy((char *)cdma_gdram + REDUCE_NUM * clusterId * sizeof(IN_DT), sram, - REDUCE_NUM * sizeof(IN_DT), SRAM2GDRAM); - __sync_all(); - if (clusterId == 0 && coreId == 0) { - __bang_write_zero(inter_x1, NMS_SIZE); - __memcpy((char *)inter_x1, (char *)cdma_gdram, sizeof(IN_DT), GDRAM2NRAM, - sizeof(IN_DT), REDUCE_NUM * sizeof(IN_DT), clusterDim - 1); - __bang_max(max_box, inter_x1, NMS_SIZE); - int max_cluster = (sizeof(IN_DT) == sizeof(half)) - ? ((uint16_t *)max_box)[1] - : ((uint32_t *)max_box)[1]; - __memcpy((char *)cdma_gdram, - (char *)cdma_gdram + max_cluster * REDUCE_NUM * sizeof(IN_DT), - REDUCE_NUM * sizeof(IN_DT), GDRAM2GDRAM); - } - __sync_all(); - __memcpy(max_box, cdma_gdram, REDUCE_NUM * sizeof(IN_DT), GDRAM2NRAM); -#else - findGlobalMaxBox(max_box, sram, inter_x1); -#endif - -#if __BANG_ARCH__ >= 300 - calMaxArea(max_box, algo, offset, max_area, max_box_x1, max_box_y1, - max_box_x2, max_box_y2); -#else - calMaxArea(max_box, algo, offset, max_area); -#endif - global_max_index = ((uint32_t *)(max_box + 5))[0]; - if (coreId != MEMORY_CORE) { - score_data[global_max_index] = 0; - } - - storeResult(max_box, nram_save, output_dram, keep, nram_save_limit_count, - max_output_size, thresh_score, output_mode, nram_save_count, - output_box_num); - - if (float(max_box[0]) <= thresh_score) { - if (clusterId == 0 && coreId == 0) { - exit_flag[0] = 1; // dram - } - } - __sync_all(); - if (exit_flag[0] == 1) { - break; - } -/******NMS STORE END******/ -#if __BANG_ARCH__ >= 300 - scoreUpdate(score_data, load_dir, store_dir, input_x1_ptr, input_y1_ptr, - input_x2_ptr, input_y2_ptr, x1, y1, x2, y2, score, inter_x1, - inter_y1, inter_x2, inter_y2, max_box, max_box_x1, max_box_y1, - max_box_x2, max_box_y2, nram_save, repeat_iou_compute, - remain_iou_compute, remain_pad_iou_compute, max_seg_iou_compute, - max_seg_pad, thresh_iou, div_thresh_iou, input_offset, offset, - max_area, input_num_boxes, algo); -#else - scoreUpdate(score_data, load_dir, store_dir, input_x1_ptr, input_y1_ptr, - input_x2_ptr, input_y2_ptr, x1, y1, x2, y2, score, inter_x1, - inter_y1, inter_x2, inter_y2, max_box, max_box[1], max_box[2], - max_box[3], max_box[4], nram_save, repeat_iou_compute, - remain_iou_compute, remain_pad_iou_compute, max_seg_iou_compute, - max_seg_pad, thresh_iou, div_thresh_iou, input_offset, offset, - max_area, input_num_boxes, algo); -#endif - } // for max_output_size -} - -__mlu_global__ void MLUUionXKernelNMS( - const void *input_boxes, const void *input_confidence, - const int input_num_boxes, const int max_output_size, - const float iou_threshold, const float confidence_threshold, - const float offset, const cnrtDataType_t data_type_input, - const int output_mode, const int algo, void *workspace, void *result_num, - void *output) { - int input_dwidth = (data_type_input == CNRT_FLOAT32) ? 4 : 2; - int32_t *exit_flag = (int32_t *)((char *)workspace + - INFO_NUM * input_num_boxes * input_dwidth); - char *cdma_addr = (char *)exit_flag + sizeof(int32_t); - int reduce_sram_size = NFU_ALIGN_SIZE * REDUCE_NUM * input_dwidth; - int availbale_sram_size = SIZE_SRAM_BUF - reduce_sram_size; - - int cluster_score_size = input_num_boxes * input_dwidth; - int cluster_boxes_size = input_num_boxes * 4 * input_dwidth; - char *sram_score = (char *)sram_buffer + reduce_sram_size; - char *sram_boxes = - (char *)sram_buffer + reduce_sram_size + cluster_score_size; - Addr input_ram = GDRAM; - if ((cluster_score_size + cluster_boxes_size) < availbale_sram_size) { - input_ram = SRAM; - __memcpy(sram_score, input_confidence, cluster_score_size, GDRAM2SRAM); - __memcpy(sram_boxes, input_boxes, cluster_boxes_size, GDRAM2SRAM); - } else { - __memcpy(workspace, input_confidence, cluster_score_size, GDRAM2GDRAM); - } - __sync_cluster(); - - uint32_t output_box_num = 0; - float *score_data; - float *boxes_data; - score_data = (input_ram == SRAM) ? (float *)sram_score : (float *)workspace; - boxes_data = (input_ram == SRAM) ? (float *)sram_boxes : (float *)input_boxes; - - if (output_mode == 0) { - if (data_type_input == CNRT_FLOAT32) { - nms_detection_ux(exit_flag, output_box_num, (uint32_t *)output, - score_data, boxes_data, input_ram, input_num_boxes, - max_output_size, iou_threshold, confidence_threshold, - offset, output_mode, algo, cdma_addr); - } else { - nms_detection_ux(exit_flag, output_box_num, (uint32_t *)output, - (half *)score_data, (half *)boxes_data, input_ram, - input_num_boxes, max_output_size, iou_threshold, - confidence_threshold, offset, output_mode, algo, - cdma_addr); - } - } else { - if (data_type_input == CNRT_FLOAT32) { - nms_detection_ux(exit_flag, output_box_num, (float *)output, score_data, - boxes_data, input_ram, input_num_boxes, max_output_size, - iou_threshold, confidence_threshold, offset, output_mode, - algo, cdma_addr); - } else { - nms_detection_ux(exit_flag, output_box_num, (half *)output, - (half *)score_data, (half *)boxes_data, input_ram, - input_num_boxes, max_output_size, iou_threshold, - confidence_threshold, offset, output_mode, algo, - cdma_addr); - } - } - ((uint32_t *)result_num)[0] = output_box_num; -} - -void KernelNms(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const cnrtDataType_t data_type_input, const void *boxes_ptr, - const void *scores_ptr, const int input_num_boxes, - const int max_output_boxes, const float iou_threshold, - const float offset, void *workspace_ptr, void *output_size_ptr, - void *output_ptr) { - switch (k_type) { - default: { return; } - case CNRT_FUNC_TYPE_BLOCK: - case CNRT_FUNC_TYPE_UNION1: { - MLUUnion1KernelNMS<<>>( - (void *)boxes_ptr, (void *)scores_ptr, input_num_boxes, - max_output_boxes, iou_threshold, /*confidence_threshold=*/0.0, - /*output_mode=*/0, workspace_ptr, output_size_ptr, output_ptr, - data_type_input, offset, /*algo=*/1); - }; break; - case CNRT_FUNC_TYPE_UNION2: - case CNRT_FUNC_TYPE_UNION4: - case CNRT_FUNC_TYPE_UNION8: - case CNRT_FUNC_TYPE_UNION16: { - MLUUionXKernelNMS<<>>( - (void *)boxes_ptr, (void *)scores_ptr, input_num_boxes, - max_output_boxes, iou_threshold, /*confidence_threshold=*/0.0, offset, - data_type_input, /*output_mode=*/0, /*algo=*/1, workspace_ptr, - output_size_ptr, output_ptr); - }; break; - } -} diff --git a/mmcv/ops/csrc/common/mlu/nms_utils.hpp b/mmcv/ops/csrc/common/mlu/nms_utils.hpp deleted file mode 100644 index 61f5ba9..0000000 --- a/mmcv/ops/csrc/common/mlu/nms_utils.hpp +++ /dev/null @@ -1,553 +0,0 @@ -/************************************************************************* - * Copyright (C) [2019-2022] by Cambricon, Inc. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#ifndef NMS_UTILS_HPP_ -#define NMS_UTILS_HPP_ -#include "common_mlu_helper.hpp" - -#define NMS_SIZE (64) -#define NMS_UP(x, y) (x / y + (int)(x % y > 0)) * y -#define NMS_DOWN(x, y) (x / y) * y -#define INFO_NUM (5) // 5 means x1, x2, y1, y2 and score -#define MEMORY_CORE (0x80) -#define REDUCE_NUM \ - (7) // score, x1, y1, x2, y2, max_index (reserve 2 num for half-type input) - -__mlu_func__ void pvLock() { -#if __BANG_ARCH__ == 270 - if (coreId != MEMORY_CORE) { - __bang_lock(0, 0); - } -#endif -} - -__mlu_func__ void pvUnlock() { -#if __BANG_ARCH__ == 270 - if (coreId != MEMORY_CORE) { - __bang_unlock(0, 0); - } -#endif -} - -template -static __mlu_func__ void computeReluN(T *nram_dst, T *nram_src, void *nram_tmp, - const int deal_num, - const T threshold = 0) { - if (threshold < 0) { - return; - } - if (threshold) { -#if __BANG_ARCH__ >= 300 - __bang_relun(nram_dst, nram_src, deal_num, threshold); -#else - int align_num = NFU_ALIGN_SIZE / sizeof(T); - T *nram_aux_a = (T *)nram_tmp; - T *nram_aux_b = nram_aux_a + deal_num; - T *nram_zero = nram_aux_b + align_num; - __bang_write_value(nram_aux_b, align_num, threshold); - __bang_write_zero(nram_zero, align_num); - __bang_cycle_lt((T *)nram_aux_a, nram_src, (T *)nram_aux_b, deal_num, - align_num); - __bang_mul(nram_dst, nram_src, (T *)nram_aux_a, deal_num); - __bang_cycle_eq((T *)nram_aux_a, (T *)nram_aux_a, (T *)nram_zero, deal_num, - align_num); - __bang_cycle_mul((T *)nram_aux_a, (T *)nram_aux_a, (T *)nram_aux_b, - deal_num, align_num); - __bang_add(nram_dst, nram_dst, (T *)nram_aux_a, deal_num); - __bang_cycle_gt((T *)nram_aux_a, nram_dst, (T *)nram_zero, deal_num, - align_num); - __bang_mul(nram_dst, nram_dst, (T *)nram_aux_a, deal_num); -#endif - } else { -#if __BANG_ARCH__ >= 300 - __bang_relu(nram_dst, nram_src, deal_num); -#else - __bang_active_relu(nram_dst, nram_src, deal_num); -#endif - } -} - -__mlu_func__ void getComputeParamsBlockOrU1( - const int input_dwidth, const int input_box_num, const int limit, - const int core_limit, int &input_offset, int &max_seg_pad, int &repeat, - int &remain, int &remain_pad, int &max_seg_iou_compute, - int &repeat_iou_compute, int &remain_iou_compute, - int &remain_pad_iou_compute) { - int avg_core = input_box_num / core_limit; - int rem = input_box_num % core_limit; - int len_core = avg_core + (coreId < rem ? 1 : 0); - input_offset = avg_core * coreId + (coreId <= rem ? coreId : rem); - max_seg_pad = NMS_DOWN(limit, NMS_SIZE); - repeat = len_core / max_seg_pad; - remain = len_core % max_seg_pad; - remain_pad = NMS_UP(remain, NMS_SIZE); - - // if datatype is fp16, we should cvt to fp32 when compute iou - max_seg_iou_compute = NMS_DOWN(max_seg_pad / (4 / input_dwidth), NMS_SIZE); - repeat_iou_compute = len_core / max_seg_iou_compute; - remain_iou_compute = len_core % max_seg_iou_compute; - remain_pad_iou_compute = NMS_UP(remain_iou_compute, NMS_SIZE); -} - -__mlu_func__ void getComputeParamsUx( - const int input_dwidth, const int input_num_boxes, const int limit, - int &input_offset, int &max_seg_pad, int &repeat, int &remain, - int &remain_pad, int &max_seg_iou_compute, int &repeat_iou_compute, - int &remain_iou_compute, int &remain_pad_iou_compute) { - // data split - int avg_cluster = input_num_boxes / clusterDim; - int rem_cluster = input_num_boxes % clusterDim; - int len_cluster = avg_cluster + (clusterId < rem_cluster); - int cluster_offset = avg_cluster * clusterId + - (clusterId <= rem_cluster ? clusterId : rem_cluster); - - int avg_core = len_cluster / coreDim; - int rem_core = len_cluster % coreDim; - int len_core = avg_core + (coreId < rem_core); - int core_offset = - avg_core * coreId + (coreId <= rem_core ? coreId : rem_core); - input_offset = cluster_offset + core_offset; - - max_seg_pad = NMS_DOWN(limit, NMS_SIZE); - - // core 0 of each cluster calculate the max score index - int max_index_len_core = avg_cluster + (clusterId < rem_cluster); - repeat = max_index_len_core / max_seg_pad; - remain = max_index_len_core % max_seg_pad; - remain_pad = NMS_UP(remain, NMS_SIZE); - // if datatype is fp16, we should cvt to fp32 when compute iou - max_seg_iou_compute = - NMS_DOWN(max_seg_pad / (sizeof(float) / input_dwidth), NMS_SIZE); - repeat_iou_compute = len_core / max_seg_iou_compute; - remain_iou_compute = len_core % max_seg_iou_compute; - remain_pad_iou_compute = NMS_UP(remain_iou_compute, NMS_SIZE); -} - -template -__mlu_func__ void findGlobalMaxBox(IN_DT *max_box, IN_DT *sram, - IN_DT *inter_x1) { - // copy all partial max to the sram of cluster 0 - if (clusterId != 0) { - __memcpy(sram + REDUCE_NUM * clusterId, sram, REDUCE_NUM * sizeof(IN_DT), - SRAM2SRAM, 0); - } - __sync_all(); - - // reduce between clusters to get the global max box - if (clusterId == 0) { - if (coreId == 0) { - __bang_write_zero(inter_x1, NMS_SIZE); - __memcpy(inter_x1, sram, sizeof(IN_DT), SRAM2NRAM, sizeof(IN_DT), - REDUCE_NUM * sizeof(IN_DT), clusterDim - 1); - __bang_max(max_box, inter_x1, NMS_SIZE); - int max_cluster = (sizeof(IN_DT) == sizeof(half)) - ? ((uint16_t *)max_box)[1] - : ((uint32_t *)max_box)[1]; - __memcpy(max_box, sram + max_cluster * REDUCE_NUM, - REDUCE_NUM * sizeof(IN_DT), SRAM2NRAM); - __memcpy(sram, max_box, REDUCE_NUM * sizeof(IN_DT), NRAM2SRAM); - } - __sync_cluster(); - if (coreId == 0x80 && clusterDim > 1) { - // broadcast global max box to each cluster's sram - for (int cluster_idx = 1; cluster_idx < clusterDim; ++cluster_idx) { - __memcpy(sram, sram, REDUCE_NUM * sizeof(IN_DT), SRAM2SRAM, - cluster_idx); - } - } - __sync_cluster(); - } - __sync_all(); - - // copy the global max box to max_box - __memcpy(max_box, sram, REDUCE_NUM * sizeof(IN_DT), SRAM2NRAM); -} - -template -__mlu_func__ void findCoreMaxBox( - IN_DT *input_score_ptr, IN_DT *score, IN_DT *inter_x1, IN_DT *max_box, - const IN_DT *input_x1_ptr, const IN_DT *input_y1_ptr, - const IN_DT *input_x2_ptr, const IN_DT *input_y2_ptr, - const mluMemcpyDirection_t load_dir, const int input_offset, - const int repeat, const int remain, const int remain_pad, - const int max_seg_pad, int &max_index) { - if (coreId != 0x80) { - for (int i = 0; i <= repeat; i++) { - if (i == repeat && remain == 0) { - break; - } - int seg_len = 0; // the length every nms compute - int cpy_len = 0; // the length every nms memcpy - i == repeat ? seg_len = remain_pad : seg_len = max_seg_pad; - i == repeat ? cpy_len = remain : cpy_len = max_seg_pad; - /******NMS LOAD START******/ - __bang_write_zero(score, seg_len); - __memcpy(score, input_score_ptr + input_offset + i * max_seg_pad, - cpy_len * sizeof(IN_DT), load_dir, cpy_len * sizeof(IN_DT), - cpy_len * sizeof(IN_DT), 0); - - /******NMS LOAD END******/ - - __bang_max(inter_x1, score, seg_len); - if (inter_x1[0] > max_box[0]) { - max_box[0] = inter_x1[0]; - if (sizeof(IN_DT) == sizeof(half)) { - max_index = ((uint16_t *)inter_x1)[1] + input_offset + - i * max_seg_pad; // offset start from head of input_data - } else if (sizeof(IN_DT) == sizeof(float)) { - max_index = ((uint32_t *)inter_x1)[1] + input_offset + - i * max_seg_pad; // offset start from head of input_data - } - } - } // for repeat - // the max box's x1, y1, x2, y2 on every core - max_box[1] = input_x1_ptr[max_index]; - max_box[2] = input_y1_ptr[max_index]; - max_box[3] = input_x2_ptr[max_index]; - max_box[4] = input_y2_ptr[max_index]; - ((uint32_t *)(max_box + 5))[0] = max_index; - } -} - -template -__mlu_func__ void findClusterMaxBox(IN_DT *sram, IN_DT *max_box, - IN_DT *inter_x1, IN_DT *input_data_score, - const int core_limit) { - // find the max with sram - // copy every core's box info to sram, form: score---x1---y1---x2---y2--- - __memcpy(sram + REDUCE_NUM * coreId, max_box, REDUCE_NUM * sizeof(IN_DT), - NRAM2SRAM); // int32_t datatype - __sync_cluster(); - - // copy score from sram to nram and find the max - __bang_write_zero(inter_x1, 64); - __memcpy(inter_x1, sram, sizeof(IN_DT), SRAM2NRAM, sizeof(IN_DT), - REDUCE_NUM * sizeof(IN_DT), coreDim - 1); - __bang_max(max_box, inter_x1, 64); - int max_core = sizeof(IN_DT) == sizeof(half) ? ((uint16_t *)max_box)[1] - : ((uint32_t *)max_box)[1]; - // copy the max box to max_box - __memcpy(max_box, sram + max_core * REDUCE_NUM, REDUCE_NUM * sizeof(IN_DT), - SRAM2NRAM); -} - -/*****************************************************************************/ -/*******************************CALCULATE MAX AREA****************************/ -/*****************************************************************************/ - -template -__mlu_func__ void calMaxArea(IN_DT *max_box, const int algo, float offset, - float &max_area) { - if (algo == 0 || offset == 0.0) { - max_area = ((float)max_box[3] - (float)max_box[1]) * - ((float)max_box[4] - (float)max_box[2]); - } else { - max_area = ((float)max_box[3] - (float)max_box[1] + offset) * - ((float)max_box[4] - (float)max_box[2] + offset); - } -} - -template -__mlu_func__ void calMaxArea(IN_DT *max_box, const int algo, float offset, - float &max_area, float &max_box_x1, - float &max_box_y1, float &max_box_x2, - float &max_box_y2) { - // the case of random inf will break the requirement of x1<=x2, y1<=y2 - // so exchange it if it happens. - max_box_x1 = float(max_box[1]); - max_box_x2 = float(max_box[3]); - if (max_box[1] > max_box[3]) { - max_box_x1 = float(max_box[3]); - max_box_x2 = float(max_box[1]); - } - max_box_y1 = float(max_box[2]); - max_box_y2 = float(max_box[4]); - if (max_box[2] > max_box[4]) { - max_box_y1 = float(max_box[4]); - max_box_y2 = float(max_box[2]); - } - if (algo == 0 || offset == 0.0) { - max_area = (max_box_x2 - max_box_x1) * (max_box_y2 - max_box_y1); - } else { - max_area = - (max_box_x2 - max_box_x1 + offset) * (max_box_y2 - max_box_y1 + offset); - } -} - -/***********************************************************************/ -/*******************************STORE RESULT****************************/ -/***********************************************************************/ -template -__mlu_func__ void storeResult(IN_DT *max_box, OUT_DT *nram_save, - OUT_DT *&output_dram, const int keep, - const int nram_save_limit_count, - const int max_output_size, - const float thresh_score, const int output_mode, - int &nram_save_count, uint32_t &output_box_num) { - /******NMS STORE START******/ - // store to nram - if (float(max_box[0]) > thresh_score) { - OUT_DT *save_ptr; - int save_offset = 0; - int save_str_num = 0; - save_ptr = nram_save; - save_offset = nram_save_count; - save_str_num = nram_save_limit_count; - if (clusterId == 0 && coreId == 0) { - if (output_mode == 0) { // index1, index2, ... - save_ptr[save_offset] = ((uint32_t *)(max_box + INFO_NUM))[0]; - } else if (output_mode == 1) { // score, x1, y1, x2, y2 - __memcpy(save_ptr + save_offset * INFO_NUM, max_box, - INFO_NUM * sizeof(IN_DT), NRAM2NRAM, INFO_NUM * sizeof(IN_DT), - INFO_NUM * sizeof(IN_DT), 0); - } else if (output_mode == 2) { // score---, x1---, y1---, x2---, y2--- - __memcpy(save_ptr + save_offset, max_box, 1 * sizeof(IN_DT), NRAM2NRAM, - save_str_num * sizeof(IN_DT), 1 * sizeof(IN_DT), 4); - } - } - nram_save_count++; - output_box_num++; - } - - // store to sram/gdram - if (output_box_num != 0) { - if ((nram_save_count == nram_save_limit_count) || - (float(max_box[0]) <= thresh_score) || keep == max_output_size - 1) { - if (nram_save_count != 0) { - if (clusterId == 0 && coreId == 0) { - if (output_mode == 0) { // index1, index2, ... - pvLock(); - __memcpy(output_dram, nram_save, nram_save_count * sizeof(uint32_t), - NRAM2GDRAM); - pvUnlock(); - output_dram += nram_save_count; - } else if (output_mode == 1) { // score, x1, y1, x2, y2 - pvLock(); - __memcpy(output_dram, nram_save, - nram_save_count * INFO_NUM * sizeof(IN_DT), NRAM2GDRAM); - pvUnlock(); - output_dram += nram_save_count * INFO_NUM; - } else if (output_mode == - 2) { // score---, x1---, y1---, x2---, y2--- - pvLock(); - __memcpy(output_dram, nram_save, nram_save_count * sizeof(IN_DT), - NRAM2GDRAM, max_output_size * sizeof(IN_DT), - nram_save_limit_count * sizeof(IN_DT), 4); - pvUnlock(); - output_dram += nram_save_count; - } - nram_save_count = 0; - } - } - } // if move data nram->sram/gdram - } // if dst -} - -template -__mlu_func__ void scoreUpdate( - IN_DT *input_score_ptr, const mluMemcpyDirection_t load_dir, - const mluMemcpyDirection_t store_dir, const IN_DT *input_x1_ptr, - const IN_DT *input_y1_ptr, const IN_DT *input_x2_ptr, - const IN_DT *input_y2_ptr, IN_DT *x1, IN_DT *y1, IN_DT *x2, IN_DT *y2, - IN_DT *score, IN_DT *inter_x1, IN_DT *inter_y1, IN_DT *inter_x2, - IN_DT *inter_y2, IN_DT *max_box, const float max_box_x1, - const float max_box_y1, const float max_box_x2, const float max_box_y2, - OUT_DT *nram_save, int repeat_iou_compute, int remain_iou_compute, - int remain_pad_iou_compute, int max_seg_iou_compute, int max_seg_pad, - const float thresh_iou, const float div_thresh_iou, const int input_offset, - const float offset, const float max_area, const int input_num_boxes, - const int algo) { - for (int i = 0; i <= repeat_iou_compute; i++) { - if (i == repeat_iou_compute && remain_iou_compute == 0) { - break; - } - int seg_len = (i == repeat_iou_compute) ? remain_pad_iou_compute - : max_seg_iou_compute; - int cpy_len = - (i == repeat_iou_compute) ? remain_iou_compute : max_seg_iou_compute; - /******NMS LOAD START******/ - int dt_offset = 0; - if (sizeof(IN_DT) == sizeof(float)) { - __memcpy(score, input_score_ptr + input_offset + i * max_seg_pad, - cpy_len * sizeof(IN_DT), load_dir, cpy_len * sizeof(IN_DT), - cpy_len * sizeof(IN_DT), 0); - dt_offset = 0; - } else if (sizeof(IN_DT) == sizeof(half)) { - __memcpy(x1, input_score_ptr + input_offset + i * max_seg_iou_compute, - cpy_len * sizeof(IN_DT), load_dir, cpy_len * sizeof(IN_DT), - cpy_len * sizeof(IN_DT), 0); - __bang_half2float((float *)score, (half *)x1, seg_len); - dt_offset = max_seg_iou_compute; - } -#if __BANG_ARCH__ >= 300 - __memcpy(inter_x1 + dt_offset, - input_x1_ptr + input_offset + i * max_seg_iou_compute, - cpy_len * sizeof(IN_DT), load_dir, max_seg_pad * sizeof(IN_DT), - input_num_boxes * sizeof(IN_DT), 3); - - if (sizeof(IN_DT) == sizeof(half)) { - __bang_half2float((float *)inter_x1, - (half *)inter_x1 + max_seg_iou_compute, seg_len); - __bang_half2float((float *)inter_y1, - (half *)inter_y1 + max_seg_iou_compute, seg_len); - __bang_half2float((float *)inter_x2, - (half *)inter_x2 + max_seg_iou_compute, seg_len); - __bang_half2float((float *)inter_y2, - (half *)inter_y2 + max_seg_iou_compute, seg_len); - } - // box transfer - __bang_minequal((float *)x1, (float *)inter_x1, (float *)inter_x2, seg_len); - __bang_maxequal((float *)x2, (float *)inter_x1, (float *)inter_x2, seg_len); - __bang_minequal((float *)y1, (float *)inter_y1, (float *)inter_y2, seg_len); - __bang_maxequal((float *)y2, (float *)inter_y1, (float *)inter_y2, seg_len); - // 1〠compute IOU - // get the area_I - __bang_maxeq_scalar((float *)inter_x1, (float *)x1, max_box_x1, - seg_len); // inter_x1 - __bang_mineq_scalar((float *)inter_x2, (float *)x2, max_box_x2, - seg_len); // inter_x2 - __bang_sub((float *)inter_x1, (float *)inter_x2, (float *)inter_x1, - seg_len); - if (algo == 1 && offset != 0.0) { - __bang_add_scalar((float *)inter_x1, (float *)inter_x1, offset, seg_len); - } - computeReluN((float *)inter_x1, (float *)inter_x1, NULL, - seg_len); // inter_w - __bang_maxeq_scalar((float *)inter_y1, (float *)y1, float(max_box_y1), - seg_len); // inter_y1 - __bang_mineq_scalar((float *)inter_y2, (float *)y2, float(max_box_y2), - seg_len); // inter_y2 - __bang_sub((float *)inter_y1, (float *)inter_y2, (float *)inter_y1, - seg_len); - if (algo == 1 && offset != 0.0) { - __bang_add_scalar((float *)inter_y1, (float *)inter_y1, offset, seg_len); - } - computeReluN((float *)inter_y1, (float *)inter_y1, NULL, - seg_len); // inter_h - __bang_mul((float *)inter_x1, (float *)inter_x1, (float *)inter_y1, - seg_len); // area_I - // get the area of input_box: area = (x2 - x1) * (y2 - y1); - if (algo == 1 && offset != 0.0) { - __bang_fusion(FUSION_FSA, (float *)inter_y1, (float *)x2, (float *)x1, - offset, seg_len, seg_len); - __bang_fusion(FUSION_FSA, (float *)inter_y2, (float *)y2, (float *)y1, - offset, seg_len, seg_len); - __bang_mul((float *)inter_x2, (float *)inter_y1, (float *)inter_y2, - seg_len); // area - } else { - __bang_sub((float *)inter_y1, (float *)x2, (float *)x1, seg_len); - __bang_fusion(FUSION_FSM, (float *)inter_x2, (float *)y2, (float *)y1, - (float *)inter_y1, seg_len, seg_len); - } - // get the area_U: area + max_area - area_I - __bang_fusion(FUSION_FAS, (float *)inter_x2, (float *)inter_x2, max_area, - (float *)inter_x1, seg_len, seg_len); - // 2〠select the box - // if IOU greater than thres, set the score to zero, abort it: area_U > - // area_I * (1 / thresh)? - if (thresh_iou > 0.0) { - __bang_mul_scalar((float *)inter_x1, (float *)inter_x1, div_thresh_iou, - seg_len); - } else { - __bang_mul_scalar((float *)inter_x2, (float *)inter_x2, thresh_iou, - seg_len); - } - // process for nan - __bang_lt((float *)inter_x1, (float *)inter_x2, (float *)inter_x1, seg_len); - __bang_not((float *)inter_x1, (float *)inter_x1, seg_len); - __bang_mul((float *)score, (float *)score, (float *)inter_x1, seg_len); -/******NMS COMPUTE END******/ -#else - __memcpy(x1 + dt_offset, - input_x1_ptr + input_offset + i * max_seg_iou_compute, - cpy_len * sizeof(IN_DT), load_dir, max_seg_pad * sizeof(IN_DT), - input_num_boxes * sizeof(IN_DT), 3); - if (sizeof(IN_DT) == sizeof(half)) { - __bang_half2float((float *)x1, (half *)x1 + max_seg_iou_compute, seg_len); - __bang_half2float((float *)y1, (half *)y1 + max_seg_iou_compute, seg_len); - __bang_half2float((float *)x2, (half *)x2 + max_seg_iou_compute, seg_len); - __bang_half2float((float *)y2, (half *)y2 + max_seg_iou_compute, seg_len); - } - // 1〠compute IOU - // get the area_I - __bang_write_value((float *)inter_y1, seg_len, - float(max_box[1])); // max_x1 - __bang_maxequal((float *)inter_x1, (float *)x1, (float *)inter_y1, - seg_len); // inter_x1 - __bang_write_value((float *)inter_y2, seg_len, - float(max_box[3])); // max_x2 - __bang_minequal((float *)inter_x2, (float *)x2, (float *)inter_y2, - seg_len); // inter_x2 - __bang_sub((float *)inter_x1, (float *)inter_x2, (float *)inter_x1, - seg_len); - if (algo == 1 && offset != 0.0) { - __bang_add_scalar((float *)inter_x1, (float *)inter_x1, offset, seg_len); - } - computeReluN((float *)inter_x1, (float *)inter_x1, NULL, - seg_len); // inter_w - __bang_write_value((float *)inter_x2, seg_len, - float(max_box[2])); // max_y1 - __bang_maxequal((float *)inter_y1, (float *)y1, (float *)inter_x2, - seg_len); // inter_y1 - __bang_write_value((float *)inter_x2, seg_len, - float(max_box[4])); // max_y2 - __bang_minequal((float *)inter_y2, (float *)y2, (float *)inter_x2, - seg_len); // inter_y2 - __bang_sub((float *)inter_y1, (float *)inter_y2, (float *)inter_y1, - seg_len); - if (algo == 1 && offset != 0.0) { - __bang_add_scalar((float *)inter_y1, (float *)inter_y1, offset, seg_len); - } - computeReluN((float *)inter_y1, (float *)inter_y1, NULL, - seg_len); // inter_h - __bang_mul((float *)inter_x1, (float *)inter_x1, (float *)inter_y1, - seg_len); // area_I - // get the area of input_box: area = (x2 - x1) * (y2 - y1); - __bang_sub((float *)inter_y1, (float *)x2, (float *)x1, seg_len); - __bang_sub((float *)inter_y2, (float *)y2, (float *)y1, seg_len); - if (algo == 1 && offset != 0.0) { - __bang_add_scalar((float *)inter_y1, (float *)inter_y1, offset, seg_len); - __bang_add_scalar((float *)inter_y2, (float *)inter_y2, offset, seg_len); - } - __bang_mul((float *)inter_x2, (float *)inter_y1, (float *)inter_y2, - seg_len); // area - // get the area_U: area + max_area - area_I - __bang_add_scalar((float *)inter_x2, (float *)inter_x2, float(max_area), - seg_len); - __bang_sub((float *)inter_x2, (float *)inter_x2, (float *)inter_x1, - seg_len); // area_U - // 2〠select the box - // if IOU greater than thresh, set the score to zero, abort it: area_U > - // area_I * (1 / thresh)? - if (thresh_iou > 0.0) { - __bang_mul_scalar((float *)inter_x1, (float *)inter_x1, div_thresh_iou, - seg_len); - } else { - __bang_mul_scalar((float *)inter_x2, (float *)inter_x2, thresh_iou, - seg_len); - } - __bang_ge((float *)inter_x1, (float *)inter_x2, (float *)inter_x1, seg_len); - __bang_mul((float *)score, (float *)score, (float *)inter_x1, seg_len); -/******NMS COMPUTE END******/ -#endif - // update the score - if (sizeof(IN_DT) == sizeof(half)) { - convertFloat2half((half *)score, (float *)score, seg_len); - } - pvLock(); - __memcpy(input_score_ptr + input_offset + i * max_seg_iou_compute, score, - cpy_len * sizeof(IN_DT), store_dir, cpy_len * sizeof(IN_DT), - cpy_len * sizeof(IN_DT), 0); - pvUnlock(); - } -} - -#endif // NMS_UTILS_HPP_ diff --git a/mmcv/ops/csrc/common/mlu/psamask_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/psamask_mlu_kernel.mlu deleted file mode 100644 index 055ee4f..0000000 --- a/mmcv/ops/csrc/common/mlu/psamask_mlu_kernel.mlu +++ /dev/null @@ -1,615 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "common_mlu_helper.hpp" -#include "psamask_utils.hpp" - -#define COMPUTE_COUNT_ALIGN 64 - -__nram__ char buf[MAX_NRAM_SIZE]; - -template -__mlu_func__ void swap(T &a, T &b) { - T tmp = a; - a = b; - b = tmp; -} - -template -__mlu_func__ void storeDataFromNramToDram(T *dst, const T *src, - const PositionInCore &position, - const Shape &shape_full) { - int n_offset = shape_full.h * shape_full.w * shape_full.c; - int h_offset = shape_full.w * shape_full.c; - int w_offset = shape_full.c; - int n_seg = position.n_end - position.n_start; - int h_seg = position.h_end - position.h_start; - int w_seg = position.w_end - position.w_start; - int size = h_seg * w_seg * shape_full.c; - - __memcpy(dst + position.n_start * n_offset + position.h_start * h_offset + - position.w_start * w_offset, - src, size * sizeof(T), NRAM2GDRAM, n_offset * sizeof(T), - size * sizeof(T), n_seg - 1); -} - -template -__mlu_func__ void loadDataFromDramToNram(T *dst, const T *src, - const PositionInCore &position, - const Shape &shape_full) { - int n_offset = shape_full.h * shape_full.w * shape_full.c; - int h_offset = shape_full.w * shape_full.c; - int w_offset = shape_full.c; - int n_seg = position.n_end - position.n_start; - int h_seg = position.h_end - position.h_start; - int w_seg = position.w_end - position.w_start; - int size = h_seg * w_seg * shape_full.c; - - __memcpy(dst, src + position.n_start * n_offset + - position.h_start * h_offset + position.w_start * w_offset, - size * sizeof(T), GDRAM2NRAM, size * sizeof(T), n_offset * sizeof(T), - n_seg - 1); -} - -// transpose the data from A*B*C*(D*E) to A*D*E*(B*C) -template -__mlu_func__ void transposeData(T *dst, T *src, const Shape &shape_seg) { - int align_c = CEIL_ALIGN(shape_seg.c, COMPUTE_COUNT_ALIGN / sizeof(T)); - int align_hw = - CEIL_ALIGN(shape_seg.h * shape_seg.w, COMPUTE_COUNT_ALIGN / sizeof(T)); - for (int i = 0; i < shape_seg.n; ++i) { - __bang_transpose(dst, src, align_hw, align_c); - dst += align_hw * align_c; - src += align_hw * align_c; - } -} - -template -__mlu_func__ void psamaskCollectForward( - const T *x_dram, T *y_dram, const PositionInCore &position, - const Shape &x_full, const Shape &y_full, const Shape &shape_seg, - const int h_mask, const int w_mask, const int half_h_mask, - const int half_w_mask) { - T *x_nram = (T *)buf; - T *y_nram = - x_nram + CEIL_ALIGN(shape_seg.n * shape_seg.h * shape_seg.w * x_full.c, - COMPUTE_COUNT_ALIGN / sizeof(T)); - loadDataFromDramToNram(x_nram, x_dram, position, x_full); - - // fill zeros to output - int elem_count = - CEIL_ALIGN(shape_seg.n * shape_seg.h * shape_seg.w * y_full.c, - NFU_ALIGN_SIZE / sizeof(T)); - __bang_write_value(y_nram, elem_count, (T)0); - - int y_n_offset = shape_seg.h * shape_seg.w * shape_seg.c; - int y_h_offset = shape_seg.w * shape_seg.c; - int y_w_offset = shape_seg.c; - int x_n_offset = shape_seg.h * shape_seg.w * x_full.c; - int y_c_offset = 1; - int x_h_offset = shape_seg.w * x_full.c; - int x_w_offset = x_full.c; - int x_c_offset = 1; - int x_start = 0; - int y_start = 0; - for (int nidx = 0; nidx < shape_seg.n; ++nidx) { - for (int hidx = 0; hidx < shape_seg.h; ++hidx) { - for (int widx = 0; widx < shape_seg.w; ++widx) { - int h_abs = hidx + position.h_start; - int w_abs = widx + position.w_start; - int y_offset = y_start; - int x_offset = x_start; - y_offset += hidx * y_h_offset + widx * y_w_offset; - x_offset += hidx * x_h_offset + widx * x_w_offset; - - const int hstart = half_h_mask - h_abs > 0 ? half_h_mask - h_abs : 0; - const int hend = x_full.h + half_h_mask - h_abs < h_mask - ? x_full.h + half_h_mask - h_abs - : h_mask; - const int wstart = half_w_mask - w_abs > 0 ? half_w_mask - w_abs : 0; - const int wend = x_full.w + half_w_mask - w_abs < w_mask - ? x_full.w + half_w_mask - w_abs - : w_mask; - // (h, w ) with mask-indexed - // (h + hidx - half_h_mask, w + widx - half_w_mask) with feature-indexed - y_offset += ((hstart + h_abs - half_h_mask) * x_full.w + wstart + - w_abs - half_w_mask) * - y_c_offset; - x_offset += (hstart * w_mask + wstart) * x_c_offset; - int count = wend - wstart; - __memcpy(y_nram + y_offset, x_nram + x_offset, count * sizeof(T), - NRAM2NRAM, y_c_offset * x_full.w * sizeof(T), - x_c_offset * w_mask * sizeof(T), hend - hstart - 1); - } - } - y_start += y_n_offset; - x_start += x_n_offset; - } - storeDataFromNramToDram(y_dram, y_nram, position, y_full); -} - -template -__mlu_func__ void psamaskDistributeForward( - const T *x_dram, T *y_dram, const PositionInCore &position, - const Shape &x_full, const Shape &y_full, const Shape &shape_seg, - const int h_mask, const int w_mask, const int half_h_mask, - const int half_w_mask) { - T *x_nram = (T *)buf; - T *y_nram_temp = - x_nram + CEIL_ALIGN(shape_seg.n * shape_seg.h * shape_seg.w * x_full.c, - COMPUTE_COUNT_ALIGN / sizeof(T)); - loadDataFromDramToNram(x_nram, x_dram, position, x_full); - - // fill zeros to output - int align_c = CEIL_ALIGN(y_full.c, COMPUTE_COUNT_ALIGN / sizeof(T)); - int align_hw = - CEIL_ALIGN(shape_seg.h * shape_seg.w, COMPUTE_COUNT_ALIGN / sizeof(T)); - int elem_count = - CEIL_ALIGN(shape_seg.n * align_c * align_hw, NFU_ALIGN_SIZE / sizeof(T)); - __bang_write_value(y_nram_temp, elem_count, (T)0); - - int y_n_offset = align_hw * align_c; - int y_h_offset = shape_seg.w * align_c; - int y_w_offset = align_c; - int y_c_offset = 1; - int x_n_offset = shape_seg.h * shape_seg.w * x_full.c; - int x_h_offset = shape_seg.w * x_full.c; - int x_w_offset = x_full.c; - int x_c_offset = 1; - int h_feature = y_full.h; - int w_feature = y_full.w; - - int y_start = 0; - int x_start = 0; - for (int nidx = 0; nidx < shape_seg.n; ++nidx) { - for (int hidx = 0; hidx < shape_seg.h; ++hidx) { - for (int widx = 0; widx < shape_seg.w; ++widx) { - int h_abs = hidx + position.h_start; - int w_abs = widx + position.w_start; - int y_offset = y_start; - int x_offset = x_start; - y_offset += hidx * y_h_offset + widx * y_w_offset; - x_offset += hidx * x_h_offset + widx * x_w_offset; - const int hstart = half_h_mask - h_abs > 0 ? half_h_mask - h_abs : 0; - const int hend = h_feature + half_h_mask - h_abs < h_mask - ? h_feature + half_h_mask - h_abs - : h_mask; - const int wstart = half_w_mask - w_abs > 0 ? half_w_mask - w_abs : 0; - const int wend = w_feature + half_w_mask - w_abs < w_mask - ? w_feature + half_w_mask - w_abs - : w_mask; - // (h, w ) with mask-indexed - // (h + hidx - half_h_mask, w + widx - half_w_mask) with feature-indexed - y_offset += ((hstart + h_abs - half_h_mask) * x_full.w + wstart + - w_abs - half_w_mask) * - y_c_offset; - x_offset += (hstart * w_mask + wstart) * x_c_offset; - int count = wend - wstart; - __memcpy(y_nram_temp + y_offset, x_nram + x_offset, count * sizeof(T), - NRAM2NRAM, y_c_offset * w_feature * sizeof(T), - x_c_offset * w_mask * sizeof(T), hend - hstart - 1); - } - } - y_start += y_n_offset; - x_start += x_n_offset; - } - // transpose y - T *y_nram = y_nram_temp + shape_seg.n * align_hw * align_c; - Shape y_seg{shape_seg.n, shape_seg.h, shape_seg.w, y_full.c}; - transposeData(y_nram, y_nram_temp, y_seg); - swap(align_c, align_hw); - // store y from nram to dram - int y_n_offset_full = y_full.h * y_full.w * y_full.c; - int y_w_offset_full = y_full.c; - int y_c_offset_full = 1; - - int y_dram_start = - position.n_start * y_n_offset_full + - (position.h_start * y_full.w + position.w_start) * y_c_offset_full; - int y_nram_start = 0; - for (int nidx = 0; nidx < shape_seg.n; ++nidx) { - int y_dram_offset = y_dram_start + nidx * y_n_offset_full; - int y_nram_offset = y_nram_start + nidx * align_hw * align_c; - __memcpy(y_dram + y_dram_offset, y_nram + y_nram_offset, - shape_seg.h * shape_seg.w * sizeof(T), NRAM2GDRAM, - y_w_offset_full * sizeof(T), align_c * sizeof(T), - h_feature * w_feature - 1); - } -} - -template -__mlu_func__ void psamaskCollectBackward( - const T *dy_dram, T *dx_dram, const PositionInCore &position, - const Shape &dy_full, const Shape &dx_full, const Shape &shape_seg, - const int h_mask, const int w_mask, const int half_h_mask, - const int half_w_mask) { - T *dy_nram = (T *)buf; - T *dx_nram = - dy_nram + CEIL_ALIGN(shape_seg.n * shape_seg.h * shape_seg.w * dy_full.c, - COMPUTE_COUNT_ALIGN / sizeof(T)); - loadDataFromDramToNram(dy_nram, dy_dram, position, dy_full); - - // fill zeros to output - int elem_count = - CEIL_ALIGN(shape_seg.n * shape_seg.h * shape_seg.w * shape_seg.c, - NFU_ALIGN_SIZE / sizeof(T)); - __bang_write_value(dx_nram, elem_count, (T)0); - - int dy_n_offset = shape_seg.h * shape_seg.w * dy_full.c; - int dy_h_offset = shape_seg.w * dy_full.c; - int dy_w_offset = dy_full.c; - int dy_c_offset = 1; - int dx_n_offset = shape_seg.h * shape_seg.w * dx_full.c; - int dx_h_offset = shape_seg.w * dx_full.c; - int dx_w_offset = dx_full.c; - int dx_c_offset = 1; - int h_feature = dy_full.h; - int w_feature = dy_full.w; - - int dy_start = 0; - int dx_start = 0; - for (int nidx = 0; nidx < shape_seg.n; ++nidx) { - for (int hidx = 0; hidx < shape_seg.h; ++hidx) { - for (int widx = 0; widx < shape_seg.w; ++widx) { - int h_abs = hidx + position.h_start; - int w_abs = widx + position.w_start; - int dy_offset = dy_start; - int dx_offset = dx_start; - dy_offset += hidx * dy_h_offset + widx * dy_w_offset; - dx_offset += hidx * dx_h_offset + widx * dx_w_offset; - - const int hstart = half_h_mask - h_abs > 0 ? half_h_mask - h_abs : 0; - const int hend = h_feature + half_h_mask - h_abs < h_mask - ? h_feature + half_h_mask - h_abs - : h_mask; - const int wstart = half_w_mask - w_abs > 0 ? half_w_mask - w_abs : 0; - const int wend = w_feature + half_w_mask - w_abs < w_mask - ? w_feature + half_w_mask - w_abs - : w_mask; - // (h, w ) with mask-indexed - // (h + h_abs - half_h_mask, w + w_abs - half_w_mask) with - // feature-indexed - dy_offset += ((hstart + h_abs - half_h_mask) * w_feature + wstart + - w_abs - half_w_mask) * - dy_c_offset; - dx_offset += (hstart * w_mask + wstart) * dx_c_offset; - int count = wend - wstart; - __memcpy(dx_nram + dx_offset, dy_nram + dy_offset, count * sizeof(T), - NRAM2NRAM, dx_c_offset * w_mask * sizeof(T), - dy_c_offset * w_feature * sizeof(T), hend - hstart - 1); - } - } - dy_start += dy_n_offset; - dx_start += dx_n_offset; - } - storeDataFromNramToDram(dx_dram, dx_nram, position, dx_full); -} - -template -__mlu_func__ void psamaskDistributeBackward( - const T *dy_dram, T *dx_dram, const PositionInCore &position, - const Shape &dy_full, const Shape &dx_full, const Shape &shape_seg, - const int h_mask, const int w_mask, const int half_h_mask, - const int half_w_mask) { - // load dy from dram to nram - T *dy_nram_temp = (T *)buf; - int dy_n_offset_full = dy_full.h * dy_full.w * dy_full.c; - int dy_c_offset_full = 1; - int h_feature = dy_full.h; - int w_feature = dy_full.w; - int align_c = - CEIL_ALIGN(shape_seg.h * shape_seg.w, COMPUTE_COUNT_ALIGN / sizeof(T)); - int align_hw = - CEIL_ALIGN(h_feature * w_feature, COMPUTE_COUNT_ALIGN / sizeof(T)); - - int dy_dram_start = - position.n_start * dy_n_offset_full + - (position.h_start * w_feature + position.w_start) * dy_c_offset_full; - int dy_nram_start = 0; - for (int i = 0; i < shape_seg.n; ++i) { - int dy_nram_offset = dy_nram_start + i * (align_hw * align_c); - int dy_dram_offset = dy_dram_start + i * dy_n_offset_full; - __memcpy(dy_nram_temp + dy_nram_offset, dy_dram + dy_dram_offset, - shape_seg.h * shape_seg.w * sizeof(T), GDRAM2NRAM, - align_c * sizeof(T), dy_full.c * sizeof(T), - h_feature * w_feature - 1); - } - T *dy_nram = dy_nram_temp + shape_seg.n * align_hw * align_c; - Shape dy_seg{shape_seg.n, h_feature, w_feature, shape_seg.h * shape_seg.w}; - transposeData(dy_nram, dy_nram_temp, dy_seg); - swap(align_c, align_hw); - - // fill zeros to dx - T *dx_nram = dy_nram + shape_seg.n * align_hw * align_c; - int dx_size = shape_seg.n * shape_seg.h * shape_seg.w * dx_full.c; - __bang_write_value(dx_nram, CEIL_ALIGN(dx_size, NFU_ALIGN_SIZE / sizeof(T)), - (T)0); - - int dy_n_offset_seg = align_hw * align_c; - int dy_h_offset_seg = shape_seg.w * align_c; - int dy_w_offset_seg = align_c; - int dy_c_offset_seg = 1; - int dx_n_offset_seg = shape_seg.h * shape_seg.w * shape_seg.c; - int dx_h_offset_seg = shape_seg.w * shape_seg.c; - int dx_w_offset_seg = shape_seg.c; - int dx_c_offset_seg = 1; - - int dy_start = 0; - int dx_start = 0; - for (int nidx = 0; nidx < shape_seg.n; ++nidx) { - for (int hidx = 0; hidx < shape_seg.h; ++hidx) { - for (int widx = 0; widx < shape_seg.w; ++widx) { - int h_abs = hidx + position.h_start; - int w_abs = widx + position.w_start; - int dy_offset = dy_start; - int dx_offset = dx_start; - dy_offset += hidx * dy_h_offset_seg + widx * dy_w_offset_seg; - dx_offset += hidx * dx_h_offset_seg + widx * dx_w_offset_seg; - const int hstart = half_h_mask - h_abs > 0 ? half_h_mask - h_abs : 0; - const int hend = h_feature + half_h_mask - h_abs < h_mask - ? h_feature + half_h_mask - h_abs - : h_mask; - const int wstart = half_w_mask - w_abs > 0 ? half_w_mask - w_abs : 0; - const int wend = w_feature + half_w_mask - w_abs < w_mask - ? w_feature + half_w_mask - w_abs - : w_mask; - // (h, w ) with mask-indexed - // (h + h_abs - half_h_mask, w + w_abs - half_w_mask) with - // feature-indexed - dy_offset += ((hstart + h_abs - half_h_mask) * w_feature + wstart + - w_abs - half_w_mask) * - dy_c_offset_seg; - dx_offset += (hstart * w_mask + wstart) * dx_c_offset_seg; - int count = wend - wstart; - __memcpy(dx_nram + dx_offset, dy_nram + dy_offset, count * sizeof(T), - NRAM2NRAM, w_mask * dx_c_offset_seg * sizeof(T), - w_feature * dy_c_offset_seg * sizeof(T), hend - hstart - 1); - } - } - dy_start += dy_n_offset_seg; - dx_start += dx_n_offset_seg; - } - storeDataFromNramToDram(dx_dram, dx_nram, position, dx_full); -} - -template -__mlu_func__ void psamaskBase(const T *input_dram, T *output_dram, - const Shape &input_full, const Shape &output_full, - LimitParam &limit, const PsamaskType psa_type, - const DimPartitionType core_partition, - const DimPartitionType cluster_partition, - const bool is_forward, const int h_mask, - const int w_mask, const int half_h_mask, - const int half_w_mask, const int n_per_core, - const int h_per_core, const int n_per_cluster, - const int h_per_cluster) { - PositionInCore position_full; - PositionInCore position_seg; - position_full.w_start = 0; - position_full.w_end = output_full.w; - int n_num_in_cluster = n_per_cluster; - int h_num_in_cluster = h_per_cluster; - - switch (cluster_partition) { - case PARTITION_N: { - position_full.h_start = 0; - position_full.h_end = input_full.h; - position_full.n_start = taskIdY * n_per_cluster; - int cluster_need = (input_full.n + n_per_cluster - 1) / n_per_cluster; - if (taskIdY >= cluster_need) return; - int n_remainder = input_full.n - (cluster_need - 1) * n_per_cluster; - n_num_in_cluster = - (taskIdY == cluster_need - 1) ? n_remainder : n_per_cluster; - position_full.n_end = position_full.n_start + n_num_in_cluster; - }; break; - case PARTITION_H: { - position_full.n_start = 0; - position_full.n_end = input_full.n; - position_full.h_start = taskIdY * h_per_cluster; - int cluster_need = (input_full.h + h_per_cluster - 1) / h_per_cluster; - if (taskIdY >= cluster_need) return; - int h_remainder = input_full.h - (cluster_need - 1) * h_per_cluster; - h_num_in_cluster = - (taskIdY == cluster_need - 1) ? h_remainder : h_per_cluster; - position_full.h_end = position_full.h_start + h_num_in_cluster; - }; break; - } - switch (core_partition) { - case PARTITION_N: { - position_full.n_start += taskIdX * n_per_core; - int core_need = (n_num_in_cluster + n_per_core - 1) / n_per_core; - if (taskIdX >= core_need) return; - int n_remainder = n_num_in_cluster - (core_need - 1) * n_per_core; - position_full.n_end = - position_full.n_start + - ((taskIdX == core_need - 1) ? n_remainder : n_per_core); - }; break; - case PARTITION_H: { - position_full.h_start += taskIdX * h_per_core; - int core_need = (h_num_in_cluster + h_per_core - 1) / h_per_core; - if (taskIdX >= core_need) return; - int h_remainder = h_num_in_cluster - (core_need - 1) * h_per_core; - position_full.h_end = - position_full.h_start + - ((taskIdX == core_need - 1) ? h_remainder : h_per_core); - }; break; - } - // the count of n ,h and w need to be processed in the current core - int shape_core_n = position_full.n_end - position_full.n_start; - int shape_core_h = position_full.h_end - position_full.h_start; - int shape_core_w = input_full.w; - - limit.n = limit.n < shape_core_n ? limit.n : shape_core_n; - limit.h = limit.h < shape_core_h ? limit.h : shape_core_h; - limit.w = limit.w < shape_core_w ? limit.w : shape_core_w; - - // load the data to nram according to the limit - for (int nidx = position_full.n_start; nidx < position_full.n_end; - nidx += limit.n) { - position_seg.n_start = nidx; - position_seg.n_end = - position_seg.n_start + (position_full.n_end - nidx < limit.n - ? position_full.n_end - nidx - : limit.n); - for (int hidx = position_full.h_start; hidx < position_full.h_end; - hidx += limit.h) { - position_seg.h_start = hidx; - position_seg.h_end = - position_seg.h_start + (position_full.h_end - hidx < limit.h - ? position_full.h_end - hidx - : limit.h); - for (int widx = position_full.w_start; widx < position_full.w_end; - widx += limit.w) { - position_seg.w_start = widx; - position_seg.w_end = - position_seg.w_start + (position_full.w_end - widx < limit.w - ? position_full.w_end - widx - : limit.w); - - // record the segment of output except the size of channel - // channel segments of output and input are the same - Shape shape_seg; - shape_seg.n = position_seg.n_end - position_seg.n_start; - shape_seg.h = position_seg.h_end - position_seg.h_start; - shape_seg.w = position_seg.w_end - position_seg.w_start; - shape_seg.c = output_full.c; - - switch (psa_type) { - case COLLECT: { - if (is_forward) { - psamaskCollectForward(input_dram, output_dram, position_seg, - input_full, output_full, shape_seg, h_mask, - w_mask, half_h_mask, half_w_mask); - } else { - psamaskCollectBackward(input_dram, output_dram, position_seg, - input_full, output_full, shape_seg, h_mask, - w_mask, half_h_mask, half_w_mask); - } - } break; - case DISTRIBUTE: { - if (is_forward) { - psamaskDistributeForward(input_dram, output_dram, position_seg, - input_full, output_full, shape_seg, - h_mask, w_mask, half_h_mask, - half_w_mask); - } else { - psamaskDistributeBackward(input_dram, output_dram, position_seg, - input_full, output_full, shape_seg, - h_mask, w_mask, half_h_mask, - half_w_mask); - } - } break; - } - } - } - } -} - -template -__mlu_global__ void MLUUnion1KernelPsamaskForward( - const T *x, T *y, const PsamaskType psa_type, - const DimPartitionType core_partition, - const DimPartitionType cluster_partition, const int batch, - const int h_feature, const int w_feature, const int h_mask, - const int w_mask, const int x_c, const int y_c, const int half_h_mask, - const int half_w_mask, const int n_per_core, const int h_per_core, - const int n_per_cluster, const int h_per_cluster, const int limit_n_seg, - const int limit_h_seg, const int limit_w_seg) { - if (coreId == 0x80) { - return; - } - Shape x_full, y_full; - x_full.n = batch; - x_full.h = h_feature; - x_full.w = w_feature; - x_full.c = x_c; - y_full.n = batch; - y_full.h = h_feature; - y_full.w = w_feature; - y_full.c = y_c; - - LimitParam limit; - limit.n = limit_n_seg; - limit.h = limit_h_seg; - limit.w = limit_w_seg; - - psamaskBase(x, y, x_full, y_full, limit, psa_type, core_partition, - cluster_partition, true, h_mask, w_mask, half_h_mask, half_w_mask, - n_per_core, h_per_core, n_per_cluster, h_per_cluster); -} - -template -__mlu_global__ void MLUUnion1KernelPsamaskBackward( - const T *dy, T *dx, const PsamaskType psa_type, - const DimPartitionType core_partition, - const DimPartitionType cluster_partition, const int batch, - const int h_feature, const int w_feature, const int h_mask, - const int w_mask, const int dx_c, const int dy_c, const int half_h_mask, - const int half_w_mask, const int n_per_core, const int h_per_core, - const int n_per_cluster, const int h_per_cluster, const int limit_n_seg, - const int limit_h_seg, const int limit_w_seg) { - if (coreId == 0x80) { - return; - } - Shape dy_full, dx_full; - dx_full.n = batch; - dx_full.h = h_feature; - dx_full.w = w_feature; - dx_full.c = dx_c; - dy_full.n = batch; - dy_full.h = h_feature; - dy_full.w = w_feature; - dy_full.c = dy_c; - - LimitParam limit; - limit.n = limit_n_seg; - limit.h = limit_h_seg; - limit.w = limit_w_seg; - - psamaskBase(dy, dx, dy_full, dx_full, limit, psa_type, core_partition, - cluster_partition, false, h_mask, w_mask, half_h_mask, - half_w_mask, n_per_core, h_per_core, n_per_cluster, - h_per_cluster); -} - -void KernelPsamaskForward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const void *x, void *y, const PsamaskType psa_type, - const DimPartitionType core_partition, - const DimPartitionType cluster_partition, const int batch, - const int h_feature, const int w_feature, const int h_mask, - const int w_mask, const int x_c, const int y_c, const int half_h_mask, - const int half_w_mask, const int n_per_core, const int h_per_core, - const int n_per_cluster, const int h_per_cluster, const int limit_n_seg, - const int limit_h_seg, const int limit_w_seg) { - MLUUnion1KernelPsamaskForward<<>>( - static_cast(x), static_cast(y), psa_type, - core_partition, cluster_partition, batch, h_feature, w_feature, h_mask, - w_mask, x_c, y_c, half_h_mask, half_w_mask, n_per_core, h_per_core, - n_per_cluster, h_per_cluster, limit_n_seg, limit_h_seg, limit_w_seg); -} - -void KernelPsamaskBackward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const void *dy, void *dx, const PsamaskType psa_type, - const DimPartitionType core_partition, - const DimPartitionType cluster_partition, const int batch, - const int h_feature, const int w_feature, const int h_mask, - const int w_mask, const int dx_c, const int dy_c, const int half_h_mask, - const int half_w_mask, const int n_per_core, const int h_per_core, - const int n_per_cluster, const int h_per_cluster, const int limit_n_seg, - const int limit_h_seg, const int limit_w_seg) { - MLUUnion1KernelPsamaskBackward<<>>( - static_cast(dy), static_cast(dx), psa_type, - core_partition, cluster_partition, batch, h_feature, w_feature, h_mask, - w_mask, dx_c, dy_c, half_h_mask, half_w_mask, n_per_core, h_per_core, - n_per_cluster, h_per_cluster, limit_n_seg, limit_h_seg, limit_w_seg); -} diff --git a/mmcv/ops/csrc/common/mlu/psamask_utils.hpp b/mmcv/ops/csrc/common/mlu/psamask_utils.hpp deleted file mode 100644 index 30ec388..0000000 --- a/mmcv/ops/csrc/common/mlu/psamask_utils.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#ifndef PSAMASK_UTILS_HPP_ -#define PSAMASK_UTILS_HPP_ - -typedef enum { - COLLECT = 0, - DISTRIBUTE = 1, -} PsamaskType; - -typedef enum { - PARTITION_N = 0, - PARTITION_H = 1, -} DimPartitionType; - -struct PartitionSeg { - int h_per_cluster; - int n_per_cluster; - int h_per_core; - int n_per_core; - DimPartitionType cluster_partition; - DimPartitionType core_partition; -}; - -struct Shape { - int n; - int h; - int w; - int c; -}; - -struct LimitParam { - int n; - int h; - int w; -}; - -struct PositionInCore { - int n_start; - int n_end; - int h_start; - int h_end; - int w_start; - int w_end; -}; -#endif // PSAMASK_UTILS_HPP_ diff --git a/mmcv/ops/csrc/common/mlu/roi_align_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/roi_align_mlu_kernel.mlu deleted file mode 100644 index c99176a..0000000 --- a/mmcv/ops/csrc/common/mlu/roi_align_mlu_kernel.mlu +++ /dev/null @@ -1,493 +0,0 @@ -/************************************************************************* - * Copyright (C) 2021 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "common_mlu_helper.hpp" - -#define ROI_OFFSET 5 - -__nram__ char buffer[MAX_NRAM_SIZE]; - -namespace forward { -template -__mlu_func__ void bilinearInterpolate(const int input_height, - const int input_width, T y, T x, T *w1, - T *w2, T *w3, T *w4, int *x_low, - int *x_high, int *y_low, int *y_high, - bool *empty) { - // deal with cases that inverse elements are of feature map boundary - if (y < -1.0 || y > input_height || x < -1.0 || x > input_width) { - *empty = true; - return; - } - - if (y <= 0) y = 0; - if (x <= 0) x = 0; - - int y_low_ = int(y); - int x_low_ = int(x); - - if (y_low_ >= input_height - 1) { - *y_high = y_low_ = input_height - 1; - y = (T)y_low_; - } else { - *y_high = y_low_ + 1; - } - - if (x_low_ >= input_width - 1) { - *x_high = x_low_ = input_width - 1; - x = T(x_low_); - } else { - *x_high = x_low_ + 1; - } - - *y_low = y_low_; - *x_low = x_low_; - - T ly = y - y_low_; - T lx = x - x_low_; - T hy = 1.0 - ly; - T hx = 1.0 - lx; - *w1 = hy * hx, *w2 = hy * lx, *w3 = ly * hx, *w4 = ly * lx; - return; -} - -template -__mlu_func__ void computeChannel(T *input_core, T *nram_in, T *output_core, - T *nram_out, const int roi_bin_grid_h, - const int roi_bin_grid_w, const T roi_start_h, - const T roi_start_w, const int ph, - const int pw, const T bin_size_h, - const T bin_size_w, const float count, - const int input_height, const int input_width, - const int channels, const int cyc_num, - const int max_elements) { - int cyc_channel = max_elements; - - for (int i = 0; i < cyc_num; i++) { - int real_channel = - (i == cyc_num - 1) ? channels - i * cyc_channel : cyc_channel; - int align_channel = PAD_UP(real_channel, NFU_ALIGN_SIZE / sizeof(T)); - __bang_write_zero(nram_out, align_channel); - uint32_t real_size = real_channel * sizeof(T); - - int iy, ix; - for (iy = 0; iy < roi_bin_grid_h; iy++) { - // 1. compute the coordinates of the y axis in the current roi_bin_grid_h - T y = roi_start_h + ph * bin_size_h + - (T)(iy + 0.5) * bin_size_h / (T)(roi_bin_grid_h); - for (ix = 0; ix < roi_bin_grid_w; ix++) { - // 2. compute the coordinates of the x axis in the current - // roi_bin_grid_w - T x = roi_start_w + pw * bin_size_w + - (T)(ix + 0.5) * bin_size_w / (T)(roi_bin_grid_w); - - // 3. compute the four weights (w1, w2, w3 and w4), the height (y_low - // and y_high) and weight (x_low and x_high) of input feature map in - // the current roi bin grid, and the flag (empty) which shows if x, y - // are out of input feature map ranges - T w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - bool empty = false; - - bilinearInterpolate(input_height, input_width, y, x, &w1, &w2, &w3, &w4, - &x_low, &x_high, &y_low, &y_high, &empty); - - // 4. compute interpolation of the current roi bin grid - // tmp_cyc1, temp_cyc2, tmp_cyc3 and tmp_cyc4 store the input values - // to compute the interpolation, and then reused to compute - // the argmax_x and argmax_y. - T *tmp_cyc1 = nram_in + cyc_channel; - T *tmp_cyc2 = nram_in + cyc_channel * 2; - T *tmp_cyc3 = nram_in + cyc_channel * 3; - T *tmp_cyc4 = nram_in + cyc_channel * 4; - - if (empty) { // exits abnormal values - __bang_write_zero(nram_in, align_channel); - } else { - __bang_write_zero(nram_in, align_channel); - uint32_t offset1 = (y_low * input_width + x_low) * channels; - uint32_t offset2 = (y_low * input_width + x_high) * channels; - uint32_t offset3 = (y_high * input_width + x_low) * channels; - uint32_t offset4 = (y_high * input_width + x_high) * channels; - T *input1 = (T *)input_core + offset1 + i * cyc_channel; - T *input2 = (T *)input_core + offset2 + i * cyc_channel; - T *input3 = (T *)input_core + offset3 + i * cyc_channel; - T *input4 = (T *)input_core + offset4 + i * cyc_channel; - - // load the four pixels (p1, p2, p3 and p4) of input feature map to - // compute interpolation - __memcpy(tmp_cyc1, input1, real_size, GDRAM2NRAM); - __memcpy(tmp_cyc2, input2, real_size, GDRAM2NRAM); - __memcpy(tmp_cyc3, input3, real_size, GDRAM2NRAM); - __memcpy(tmp_cyc4, input4, real_size, GDRAM2NRAM); - - // interpolation value = w1 * p1 + w2 * p2 + w3 * p3 + w4 * p4 - __bang_mul_scalar(tmp_cyc1, tmp_cyc1, w1, align_channel); - __bang_mul_scalar(tmp_cyc2, tmp_cyc2, w2, align_channel); - __bang_mul_scalar(tmp_cyc3, tmp_cyc3, w3, align_channel); - __bang_mul_scalar(tmp_cyc4, tmp_cyc4, w4, align_channel); - - __bang_add(nram_in, tmp_cyc1, nram_in, align_channel); - __bang_add(nram_in, tmp_cyc2, nram_in, align_channel); - __bang_add(nram_in, tmp_cyc3, nram_in, align_channel); - __bang_add(nram_in, tmp_cyc4, nram_in, align_channel); - } - // 5. compute sum value and corresponding coordinates of x axis and y - // axis. Update the sum value. - __bang_add(nram_out, nram_in, nram_out, align_channel); - } // loop_roi_grid_w - } // loop_roi_grid_h - T count_value = (T)(1.0 / count); - __bang_mul_scalar(nram_out, nram_out, count_value, align_channel); - __memcpy(output_core + i * cyc_channel, nram_out, real_size, NRAM2GDRAM); - } // loop_cyc_num -} - -template -__mlu_func__ void roialignForwardAvg( - T *input, T *rois, T *output, const bool aligned, const int channels, - const int pooled_height, const int pooled_width, const int input_height, - const int input_width, const int sampling_ratio, const T spatial_scale, - const int num_rois) { - // find limit for channel, the nram space is divided to 6 parts that are - // input, 4 weights to compute the interpolation (w1, w2, w3, w4), output - - // max_elements : 300 : float datatype : 27296, half datatype : 54592 - // max_elements : 200 : float datatype : 16384, half datatype : 32768 - int max_elements = (PAD_DOWN(MAX_NRAM_SIZE / 6, NFU_ALIGN_SIZE)) / sizeof(T); - int cyc_num = channels / max_elements + (int)(channels % max_elements != 0); - T offset = aligned ? (T)0.5 : (T)0.0; - int task_num = num_rois * pooled_height * pooled_width; - T *nram_out = (T *)buffer; - T *nram_in = nram_out + max_elements; - if (task_num < taskDim) { - if (taskId >= task_num) { - return; - } - } - - for (int bin_idx = taskId; bin_idx < task_num; bin_idx = bin_idx + taskDim) { - if (bin_idx >= task_num) { - return; - } - - // (n,ph.pw) is a c in the pooled output - int pw = bin_idx % pooled_width; - int ph = (bin_idx / pooled_width) % pooled_height; - int n = bin_idx / pooled_width / pooled_height; - - T *roi_id_tmp = rois + n * ROI_OFFSET; - // 1. compute width and height of roi region. - int batch_idx = (int)roi_id_tmp[0]; - T roi_x1 = roi_id_tmp[1]; - T roi_y1 = roi_id_tmp[2]; - T roi_x2 = roi_id_tmp[3]; - T roi_y2 = roi_id_tmp[4]; - T roi_start_w = roi_x1 * spatial_scale - offset; - T roi_start_h = roi_y1 * spatial_scale - offset; - T roi_end_w = roi_x2 * spatial_scale - offset; - T roi_end_h = roi_y2 * spatial_scale - offset; - T roi_width = roi_end_w - roi_start_w; - T roi_height = roi_end_h - roi_start_h; - - if (!aligned) { - roi_width = roi_width > (T)(1.0) ? roi_width : (T)(1.0); - roi_height = roi_height > (T)(1.0) ? roi_height : (T)(1.0); - } - - // 2. compute float-type width and height of roi bin region. - T bin_size_w = (T)roi_width / (T)pooled_width; - T bin_size_h = (T)roi_height / (T)pooled_height; - - // 3. compute int-type width and height of roi bin region. - int roi_bin_grid_h, roi_bin_grid_w; - roi_bin_grid_h = (sampling_ratio > 0) - ? sampling_ratio - : int(ceilf(roi_height / pooled_height)); - roi_bin_grid_w = (sampling_ratio > 0) - ? sampling_ratio - : int(ceilf(roi_width / pooled_width)); - float count = (float)((roi_bin_grid_h * roi_bin_grid_w) > 1 - ? roi_bin_grid_h * roi_bin_grid_w - : 1.0); - T *input_core = input + batch_idx * channels * input_width * input_height; - T *output_core = output + bin_idx * channels; - // 4. compute avg value and corresponding coordinates of x axis and y axis. - computeChannel(input_core, nram_in, output_core, nram_out, roi_bin_grid_h, - roi_bin_grid_w, roi_start_h, roi_start_w, ph, pw, bin_size_h, - bin_size_w, count, input_height, input_width, channels, - cyc_num, max_elements); - } -} - -__mlu_global__ void MLUUnion1KernelRoiAlignAvg( - const void *input, const void *rois, const int channels, const bool aligned, - const int pooled_height, const int pooled_width, const int input_height, - const int input_width, const int sampling_ratio, const float spatial_scale, - const int num_rois, const cnrtDataType_t data_type, void *output) { - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - - switch (data_type) { - case CNRT_FLOAT16: { - roialignForwardAvg((half *)input, (half *)rois, (half *)output, aligned, - channels, pooled_height, pooled_width, input_height, - input_width, sampling_ratio, (half)spatial_scale, - num_rois); - }; break; - case CNRT_FLOAT32: { - roialignForwardAvg((float *)input, (float *)rois, (float *)output, - aligned, channels, pooled_height, pooled_width, - input_height, input_width, sampling_ratio, - (float)spatial_scale, num_rois); - }; break; - default: - break; - } - - return; -} -} // namespace forward - -namespace backward { -__mlu_func__ void bilinearInterpolateGradient(int height, int width, float y, - float x, float *w1, float *w2, - float *w3, float *w4, int *x_low, - int *x_high, int *y_low, - int *y_high) { - if (y < -1.0 || y > height || x < -1.0 || x > width) { - *w1 = 0.0, *w2 = 0.0, *w3 = 0.0, *w4 = 0.0; - *x_low = -1, *x_high = -1, *y_low = -1, *y_high = -1; - return; - } - if (y <= 0) { - y = 0; - } - if (x <= 0) { - x = 0; - } - *y_low = (int)y; - *x_low = (int)x; - if (*y_low >= height - 1) { - *y_high = height - 1, *y_low = height - 1; - y = (float)(*y_low); - } else { - *y_high = *y_low + 1; - } - if (*x_low >= width - 1) { - *x_high = width - 1, *x_low = width - 1; - x = (float)(*x_low); - } else { - *x_high = *x_low + 1; - } - float ly = y - *y_low, lx = x - *x_low; - float hy = 1.0 - ly, hx = 1.0 - lx; - *w1 = hy * hx, *w2 = hy * lx, *w3 = ly * hx, *w4 = ly * lx; - return; -} - -template -__mlu_func__ void unionRoiAlignBp( - T *grads, T *boxes, T *grads_image, const int boxes_num, const int hi, - const int wi, const int c, const int no, const int ho, const int wo, - const float spatial_scale, const int sampling_ratio, const bool aligned) { - int c_align = PAD_UP(c, NFU_ALIGN_SIZE / sizeof(T)); - int deal_all = boxes_num * hi * wi; - int deal_this_core = deal_all / taskDim + (int)(taskId < deal_all % taskDim); - for (int i = 0; i < deal_this_core; ++i) { - int bhw_id = i * taskDim + taskId; - int box_id = bhw_id / (hi * wi); - int ih = (bhw_id / wi) % hi; - int iw = bhw_id % wi; - T *box = boxes + box_id * 5; - int image_id = (int)box[0]; - T *image_offset = grads_image + image_id * ho * wo * c; - T *grads_ = grads + box_id * hi * wi * c + ih * wi * c + iw * c; - - float offset = aligned ? 0.5 : 0.0; - float x1 = box[1] * spatial_scale - offset; - float y1 = box[2] * spatial_scale - offset; - float x2 = box[3] * spatial_scale - offset; - float y2 = box[4] * spatial_scale - offset; - float roi_width = x2 - x1; - float roi_height = y2 - y1; - if (!aligned) { - roi_width = (roi_width > 1.0) ? roi_width : 1.0; - roi_height = (roi_height > 1.0) ? roi_height : 1.0; - } - float bin_size_h = roi_height / hi; - float bin_size_w = roi_width / wi; - - int roi_grid_h = - (sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_height / hi); - int roi_grid_w = - (sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_width / wi); - const T count = roi_grid_h * roi_grid_w; - if (c_align * sizeof(T) * 2 <= MAX_NRAM_SIZE) { - for (int iy = 0; iy < roi_grid_h; ++iy) { - const float y = - y1 + ih * bin_size_h + (iy + 0.5) * bin_size_h / roi_grid_h; - for (int ix = 0; ix < roi_grid_w; ++ix) { - const float x = - x1 + iw * bin_size_w + (ix + 0.5) * bin_size_w / roi_grid_w; - float w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - bilinearInterpolateGradient(ho, wo, y, x, &w1, &w2, &w3, &w4, &x_low, - &x_high, &y_low, &y_high); - if (x_low >= 0 && y_low >= 0) { - __memcpy(buffer, grads_, c * sizeof(T), GDRAM2NRAM); - __bang_mul_scalar((T *)buffer + c_align, (T *)buffer, (T)w1, - c_align); - __bang_mul_scalar((T *)buffer + c_align, (T *)buffer + c_align, - 1 / count, c_align); - __bang_atomic_add((T *)buffer + c_align, - image_offset + y_low * wo * c + x_low * c, - (T *)buffer + c_align, c); - __bang_mul_scalar((T *)buffer + c_align, (T *)buffer, (T)w2, - c_align); - __bang_mul_scalar((T *)buffer + c_align, (T *)buffer + c_align, - 1 / count, c_align); - __bang_atomic_add((T *)buffer + c_align, - image_offset + y_low * wo * c + x_high * c, - (T *)buffer + c_align, c); - __bang_mul_scalar((T *)buffer + c_align, (T *)buffer, (T)w3, - c_align); - __bang_mul_scalar((T *)buffer + c_align, (T *)buffer + c_align, - 1 / count, c_align); - __bang_atomic_add((T *)buffer + c_align, - image_offset + y_high * wo * c + x_low * c, - (T *)buffer + c_align, c); - __bang_mul_scalar((T *)buffer + c_align, (T *)buffer, (T)w4, - c_align); - __bang_mul_scalar((T *)buffer + c_align, (T *)buffer + c_align, - 1 / count, c_align); - __bang_atomic_add((T *)buffer + c_align, - image_offset + y_high * wo * c + x_high * c, - (T *)buffer + c_align, c); - } // x_low && y_low - } // ix - } // iy - } else { - for (int iy = 0; iy < roi_grid_h; ++iy) { - const float y = - y1 + ih * bin_size_h + (iy + 0.5) * bin_size_h / roi_grid_h; - for (int ix = 0; ix < roi_grid_w; ++ix) { - const float x = - x1 + iw * bin_size_w + (ix + 0.5) * bin_size_w / roi_grid_w; - float w1, w2, w3, w4; - int x_low, x_high, y_low, y_high; - bilinearInterpolateGradient(ho, wo, y, x, &w1, &w2, &w3, &w4, &x_low, - &x_high, &y_low, &y_high); - if (x_low >= 0 && y_low >= 0) { - int deal_once = - PAD_DOWN(MAX_NRAM_SIZE / 2, NFU_ALIGN_SIZE) / sizeof(T); - int c_repeat = c / deal_once + (int)(c % deal_once != 0); - for (int i = 0; i < c_repeat; ++i) { - int deal_c = deal_once; - int align_c = deal_once; - if (i == c_repeat - 1) { - deal_c = c - i * deal_once; - align_c = c_align - i * deal_once; - } - __memcpy(buffer, grads_ + i * deal_once, deal_c * sizeof(T), - GDRAM2NRAM); - __bang_mul_scalar((T *)buffer + align_c, (T *)buffer, (T)w1, - align_c); - __bang_mul_scalar((T *)buffer + align_c, (T *)buffer + align_c, - 1 / count, align_c); - __bang_atomic_add( - (T *)buffer + align_c, - image_offset + y_low * wo * c + x_low * c + i * deal_once, - (T *)buffer + align_c, deal_c); - __bang_mul_scalar((T *)buffer + align_c, (T *)buffer, (T)w2, - align_c); - __bang_mul_scalar((T *)buffer + align_c, (T *)buffer + align_c, - 1 / count, align_c); - __bang_atomic_add( - (T *)buffer + align_c, - image_offset + y_low * wo * c + x_high * c + i * deal_once, - (T *)buffer + align_c, deal_c); - __bang_mul_scalar((T *)buffer + align_c, (T *)buffer, (T)w3, - align_c); - __bang_mul_scalar((T *)buffer + align_c, (T *)buffer + align_c, - 1 / count, align_c); - __bang_atomic_add( - (T *)buffer + align_c, - image_offset + y_high * wo * c + x_low * c + i * deal_once, - (T *)buffer + align_c, deal_c); - __bang_mul_scalar((T *)buffer + align_c, (T *)buffer, (T)w4, - align_c); - __bang_mul_scalar((T *)buffer + align_c, (T *)buffer + align_c, - 1 / count, align_c); - __bang_atomic_add( - (T *)buffer + align_c, - image_offset + y_high * wo * c + x_high * c + i * deal_once, - (T *)buffer + align_c, deal_c); - } // for c_repeat - } // x_low >= 0 && y_low >= 0 - } // ix - } // iy - } // if c - } // i -} - -__mlu_global__ void MLUUnion1KernelRoiAlignBackward( - const void *grads, const void *boxes, void *grads_image, - const cnrtDataType_t dtype, const int boxes_num, const int hi, const int wi, - const int c, const int no, const int ho, const int wo, - const float spatial_scale, const int sampling_ratio, const bool aligned) { - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - switch (dtype) { - case CNRT_FLOAT16: { - unionRoiAlignBp((half *)grads, (half *)boxes, (half *)grads_image, - boxes_num, hi, wi, c, no, ho, wo, spatial_scale, - sampling_ratio, aligned); - }; break; - case CNRT_FLOAT32: { - unionRoiAlignBp((float *)grads, (float *)boxes, (float *)grads_image, - boxes_num, hi, wi, c, no, ho, wo, spatial_scale, - sampling_ratio, aligned); - }; break; - default: { return; } - } -} -} // namespace backward - -void KernelRoiAlign(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, const cnrtDataType_t d_type, - const void *input, const void *rois, const int channels, - const bool aligned, const int pooled_height, - const int pooled_width, const int input_height, - const int input_width, const int sampling_ratio, - const float spatial_scale, const int num_rois, - void *output) { - forward::MLUUnion1KernelRoiAlignAvg<<>>( - input, rois, channels, aligned, pooled_height, pooled_width, input_height, - input_width, sampling_ratio, spatial_scale, num_rois, d_type, output); -} - -void KernelRoiAlignBackward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, const cnrtDataType_t dtype, - const void *grads, const void *boxes, - void *grads_image, const int boxes_num, - const int hi, const int wi, const int c, - const int no, const int ho, const int wo, - const float spatial_scale, const int sampling_ratio, - const bool aligned) { - backward::MLUUnion1KernelRoiAlignBackward<<>>( - grads, boxes, grads_image, dtype, boxes_num, hi, wi, c, no, ho, wo, - spatial_scale, sampling_ratio, aligned); -} diff --git a/mmcv/ops/csrc/common/mlu/roi_align_rotated_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/roi_align_rotated_mlu_kernel.mlu deleted file mode 100644 index 9356776..0000000 --- a/mmcv/ops/csrc/common/mlu/roi_align_rotated_mlu_kernel.mlu +++ /dev/null @@ -1,490 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * OR IMPLIED, INCLUDING BUvoid NOKType LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENvoid SHALL THE AUTHORS OR COPYRIGHKType HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORvoid OR OTHERWISE, ARISING FROM, OUKType OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "common_mlu_helper.hpp" -#include "roi_align_rotated_utils.hpp" - -#define ROI_OFFSET 6 -#define SAMPLING_NUM 4 - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -template -__mlu_func__ void swap(T &a, T &b) { - T tmp = a; - a = b; - b = tmp; -} - -template -__mlu_func__ void bilinearInterpolate(const int input_height, - const int input_width, T x, T y, T *w1, - T *w2, T *w3, T *w4, int *x_low, - int *x_high, int *y_low, int *y_high, - bool *empty) { - // deal with case that the point is out of feature map boundary - if (y < -1.0 || y > input_height || x < -1.0 || x > input_width) { - *empty = true; - return; - } - - if (y <= 0) y = (T)0; - if (x <= 0) x = (T)0; - - *y_low = int(y); - *x_low = int(x); - - if (*y_low >= input_height - 1) { - *y_high = *y_low = input_height - 1; - y = (T)(*y_low); - } else { - *y_high = *y_low + 1; - } - - if (*x_low >= input_width - 1) { - *x_high = *x_low = input_width - 1; - x = T(*x_low); - } else { - *x_high = *x_low + 1; - } - T ly = y - *y_low; - T lx = x - *x_low; - T hy = 1.0 - ly; - T hx = 1.0 - lx; - *w1 = hy * hx; - *w2 = hy * lx; - *w3 = ly * hx; - *w4 = ly * lx; - return; -} - -template -__mlu_func__ void getRoiBinInfo(const T *rois_dram, const int bin_i, - const RoiAlignRotatedParams ¶ms, - int *batch_idx, int *roi_n, int *pw, int *ph, - T *roi_center_x, T *roi_center_y, T *roi_width, - T *roi_height, T *theta) { - T offset = params.aligned ? (T)0.5 : (T)0.0; - *pw = bin_i % params.pooled_width; - *ph = (bin_i / params.pooled_width) % params.pooled_height; - *roi_n = bin_i / params.pooled_width / params.pooled_height; - const T *roi_info = rois_dram + (*roi_n) * ROI_OFFSET; - *batch_idx = (int)roi_info[0]; - *roi_center_x = roi_info[1] * (T)params.spatial_scale - offset; - *roi_center_y = roi_info[2] * (T)params.spatial_scale - offset; - *roi_width = roi_info[3] * (T)params.spatial_scale; - *roi_height = roi_info[4] * (T)params.spatial_scale; - *theta = roi_info[5]; - if (params.clockwise) { - *theta = -(*theta); - } - if (!params.aligned) { - *roi_width = *roi_width > (T)1.0 ? *roi_width : (T)1.0; - *roi_height = *roi_height > (T)1.0 ? *roi_height : (T)1.0; - } -} - -template -__mlu_func__ void roiAlignRotatedForward(const T *input_dram, - const T *rois_dram, const int batch, - const int height, const int width, - const int channel, const int rois_num, - const RoiAlignRotatedParams ¶ms, - T *output_dram) { - int align_base_128 = NFU_ALIGN_SIZE / sizeof(T); - int channel_max_cap = MAX_NRAM_SIZE / sizeof(T) / (2 * SAMPLING_NUM + 1); - channel_max_cap = channel_max_cap / align_base_128 * align_base_128; - int channel_align = channel < channel_max_cap ? channel : channel_max_cap; - channel_align = CEIL_ALIGN(channel_align, align_base_128); - - T *nram_out = (T *)nram_buffer; - T *nram_ping = nram_out + channel_align; - T *nram_pong = nram_ping + channel_align * SAMPLING_NUM; - - int bin_first = taskId; - int bin_end = rois_num * params.pooled_height * params.pooled_width; - - for (int bin_i = bin_first; bin_i < bin_end; bin_i += taskDim) { - T roi_center_x, roi_center_y, roi_width, roi_height, theta; - int batch_idx, roi_n, pw, ph; - getRoiBinInfo(rois_dram, bin_i, params, &batch_idx, &roi_n, &pw, &ph, - &roi_center_x, &roi_center_y, &roi_width, &roi_height, - &theta); - T bin_size_h = roi_height / params.pooled_height; - T bin_size_w = roi_width / params.pooled_width; - - int roi_bin_grid_h = - (params.sample_ratio > 0) - ? params.sample_ratio - : __float2int_up((float)roi_height / params.pooled_height); - int roi_bin_grid_w = - (params.sample_ratio > 0) - ? params.sample_ratio - : __float2int_up((float)roi_width / params.pooled_width); - T roi_start_y = -roi_height / 2; - T roi_start_x = -roi_width / 2; - const int bin_dim = roi_bin_grid_h * roi_bin_grid_w > 1 - ? roi_bin_grid_h * roi_bin_grid_w - : 1; - T cos_theta = std::cos(theta); - T sin_theta = std::sin(theta); - T zero_sign = 1.0f / bin_dim; - - bool is_first_sample = true; - int src_offset = 0; - int dst_offset = 0; - int c_rem, c_slice, c_slice_align, pongc_slice, pongc_slice_align; - for (int c_offset = 0; c_offset < channel; c_offset += channel_align) { - __bang_write_value(nram_out, channel_align, (T)0); - c_rem = channel - c_offset; - c_slice = channel_align > c_rem ? c_rem : channel_align; - c_slice_align = CEIL_ALIGN(c_slice, align_base_128); - is_first_sample = true; - for (int iy = 0; iy < roi_bin_grid_h; ++iy) { - const T yy = roi_start_y + ph * bin_size_h + - T(iy + 0.5) * bin_size_h / roi_bin_grid_h; - for (int ix = 0; ix < roi_bin_grid_w; ++ix) { - const T xx = roi_start_x + pw * bin_size_w + - T(ix + 0.5) * bin_size_w / roi_bin_grid_w; - int sample_i = iy * roi_bin_grid_w + ix; - - T y = yy * cos_theta - xx * sin_theta + roi_center_y; - T x = yy * sin_theta + xx * cos_theta + roi_center_x; - T w1, w2, w3, w4; - bool empty = false; - int x_low, x_high, y_low, y_high; - bilinearInterpolate(height, width, x, y, &w1, &w2, &w3, &w4, &x_low, - &x_high, &y_low, &y_high, &empty); - /******************************************************* - | ping | pong | - |------|-----|-----|-----|-----|-----|-----|-----|-----| - |output| p1 | p2 | p3 | p4 | p1 | p2 | p3 | p4 | - |------|-----|-----|-----|-----|-----|-----|-----|-----| - ********************************************************/ - if (is_first_sample && !empty) { - // load input data from dram to nram - __bang_write_value(nram_ping, SAMPLING_NUM * c_slice_align, (T)0); - src_offset = - (batch_idx * height * width + y_low * width + x_low) * channel + - c_offset; - dst_offset = 0; - __memcpy(nram_ping + dst_offset, input_dram + src_offset, - c_slice * sizeof(T), GDRAM2NRAM); - src_offset = (batch_idx * height * width + y_low * width + x_high) * - channel + - c_offset; - dst_offset = c_slice_align; - __memcpy(nram_ping + dst_offset, input_dram + src_offset, - c_slice * sizeof(T), GDRAM2NRAM); - src_offset = (batch_idx * height * width + y_high * width + x_low) * - channel + - c_offset; - dst_offset = c_slice_align * 2; - __memcpy(nram_ping + dst_offset, input_dram + src_offset, - c_slice * sizeof(T), GDRAM2NRAM); - src_offset = - (batch_idx * height * width + y_high * width + x_high) * - channel + - c_offset; - dst_offset = c_slice_align * 3; - __memcpy(nram_ping + dst_offset, input_dram + src_offset, - c_slice * sizeof(T), GDRAM2NRAM); - } - // load next input data to nram - if (sample_i + 1 < bin_dim) { - int p_iy = (sample_i + 1) / roi_bin_grid_w; - int p_ix = (sample_i + 1) % roi_bin_grid_w; - const T p_yy = roi_start_y + ph * bin_size_h + - T(p_iy + 0.5) * bin_size_h / roi_bin_grid_h; - const T p_xx = roi_start_x + pw * bin_size_w + - T(p_ix + 0.5) * bin_size_w / roi_bin_grid_w; - T p_y = p_yy * cos_theta - p_xx * sin_theta + roi_center_y; - T p_x = p_yy * sin_theta + p_xx * cos_theta + roi_center_x; - T p_w1, p_w2, p_w3, p_w4; - bool p_empty = false; - int p_x_low, p_x_high, p_y_low, p_y_high; - bilinearInterpolate(height, width, p_x, p_y, &p_w1, &p_w2, &p_w3, - &p_w4, &p_x_low, &p_x_high, &p_y_low, &p_y_high, - &p_empty); - pongc_slice = c_slice; - pongc_slice_align = c_slice_align; - if (!p_empty) { - __bang_write_value(nram_pong, SAMPLING_NUM * pongc_slice_align, - (T)0); - src_offset = - (batch_idx * height * width + p_y_low * width + p_x_low) * - channel + - c_offset; - dst_offset = 0; - __memcpy(nram_pong + dst_offset, input_dram + src_offset, - c_slice * sizeof(T), GDRAM2NRAM); - src_offset = - (batch_idx * height * width + p_y_low * width + p_x_high) * - channel + - c_offset; - dst_offset = pongc_slice_align; - __memcpy(nram_pong + dst_offset, input_dram + src_offset, - c_slice * sizeof(T), GDRAM2NRAM); - src_offset = - (batch_idx * height * width + p_y_high * width + p_x_low) * - channel + - c_offset; - dst_offset = pongc_slice_align * 2; - __memcpy(nram_pong + dst_offset, input_dram + src_offset, - c_slice * sizeof(T), GDRAM2NRAM); - src_offset = - (batch_idx * height * width + p_y_high * width + p_x_high) * - channel + - c_offset; - dst_offset = pongc_slice_align * 3; - __memcpy(nram_pong + dst_offset, input_dram + src_offset, - c_slice * sizeof(T), GDRAM2NRAM); - } - } - T *tmp_sum = nram_ping + 3 * c_slice_align; - if (empty) { - __bang_write_value(tmp_sum, c_slice_align, T(0)); - } else { - __bang_mul_scalar(nram_ping, nram_ping, w1, c_slice_align); - __bang_mul_scalar(nram_ping + c_slice_align, - nram_ping + c_slice_align, w2, c_slice_align); - __bang_mul_scalar(nram_ping + 2 * c_slice_align, - nram_ping + 2 * c_slice_align, w3, c_slice_align); - __bang_mul_scalar(nram_ping + 3 * c_slice_align, - nram_ping + 3 * c_slice_align, w4, c_slice_align); - __bang_sumpool(tmp_sum, nram_ping, c_slice_align, 1, SAMPLING_NUM, - 1, SAMPLING_NUM, 1, 1); - } - __bang_add(nram_out, nram_out, tmp_sum, c_slice_align); - swap(nram_ping, nram_pong); - __asm__ volatile("sync;"); - is_first_sample = false; - } - } - __bang_mul_scalar(nram_out, nram_out, zero_sign, c_slice_align); - // store the result to dram - int output_offset = - ((roi_n * params.pooled_height + ph) * params.pooled_width + pw) * - channel + - c_offset; - __memcpy(output_dram + output_offset, nram_out, c_slice * sizeof(T), - NRAM2GDRAM); - } - } -} - -template -__mlu_func__ void roiAlignRotatedBackward(const T *top_grad_dram, - const T *rois_dram, const int batch, - const int height, const int width, - const int channel, const int rois_num, - const RoiAlignRotatedParams ¶ms, - T *bottom_grad_dram) { - int align_base_128 = NFU_ALIGN_SIZE / sizeof(T); - int channel_align = CEIL_ALIGN(channel, align_base_128); - - unsigned int max_element = MAX_NRAM_SIZE / sizeof(T); - int c_limit = max_element >> 2; - c_limit = c_limit > channel_align ? channel_align : c_limit; - - T *nram_ping = (T *)nram_buffer; - T *nram_pong = nram_ping + 2 * c_limit; - T *nram_output = nullptr; - - int bin_first = taskId; - int bin_end = rois_num * params.pooled_height * params.pooled_width; - bool is_first_bin = true; - T roi_center_x, roi_center_y, roi_width, roi_height, theta; - int batch_idx, roi_n, pw, ph; - T pong_roi_center_x, pong_roi_center_y, pong_roi_width, pong_roi_height, - pong_theta; - int pong_batch_idx, pong_roi_n, pong_pw, pong_ph; - for (int bin_i = bin_first; bin_i < bin_end; bin_i += taskDim) { - getRoiBinInfo(rois_dram, bin_i, params, &batch_idx, &roi_n, &pw, &ph, - &roi_center_x, &roi_center_y, &roi_width, &roi_height, - &theta); - T bin_size_h = roi_height / params.pooled_height; - T bin_size_w = roi_width / params.pooled_width; - - int roi_bin_grid_h = - (params.sample_ratio > 0) - ? params.sample_ratio - : __float2int_up((float)roi_height / params.pooled_height); - int roi_bin_grid_w = - (params.sample_ratio > 0) - ? params.sample_ratio - : __float2int_up((float)roi_width / params.pooled_width); - T roi_start_y = -roi_height / 2; - T roi_start_x = -roi_width / 2; - const int bin_dim = roi_bin_grid_h * roi_bin_grid_w > 1 - ? roi_bin_grid_h * roi_bin_grid_w - : 1; - T cos_theta = std::cos(theta); - T sin_theta = std::sin(theta); - T zero_sign = 1.0f / bin_dim; - int c_rem, c_slice, pongc_slice, c_offset; - c_rem = channel; - c_offset = 0; - /**************************************** - | ping | pong | - |---------|---------|---------|---------| - | input | output | input | output | - |---------|---------|---------|---------| - *****************************************/ - if (is_first_bin) { - // load the first top_grad to nram - c_slice = c_limit < c_rem ? c_limit : c_rem; - int top_grad_offset = - ((roi_n * params.pooled_height + ph) * params.pooled_width + pw) * - channel; - __memcpy(nram_ping, top_grad_dram + top_grad_offset, c_slice * sizeof(T), - GDRAM2NRAM); - } - nram_output = nram_ping + c_limit; - while (c_rem > 0) { - c_slice = c_slice < c_rem ? c_slice : c_rem; - // load the next top_grad to nram - if (c_rem - c_slice > 0) { - // load the rest channels to nram - pongc_slice = (c_rem - c_slice > c_slice) ? c_slice : c_rem - c_slice; - int top_grad_offset = - ((roi_n * params.pooled_height + ph) * params.pooled_width + pw) * - channel + - c_offset + c_slice; - __memcpy_async(nram_pong, top_grad_dram + top_grad_offset, - pongc_slice * sizeof(T), GDRAM2NRAM); - } else if (bin_i + taskDim < bin_end) { - // load next bin's data to nram - getRoiBinInfo(rois_dram, bin_i + taskDim, params, &pong_batch_idx, - &pong_roi_n, &pong_pw, &pong_ph, &pong_roi_center_x, - &pong_roi_center_y, &pong_roi_width, &pong_roi_height, - &pong_theta); - pongc_slice = c_limit < channel ? c_limit : channel; - int top_grad_offset = ((pong_roi_n * params.pooled_height + pong_ph) * - params.pooled_width + - pong_pw) * - channel; - __memcpy_async(nram_pong, top_grad_dram + top_grad_offset, - c_slice * sizeof(T), GDRAM2NRAM); - } - // comput the output in a single bin - - for (int iy = 0; iy < roi_bin_grid_h; ++iy) { - const T yy = roi_start_y + ph * bin_size_h + - T(iy + 0.5) * bin_size_h / roi_bin_grid_h; - for (int ix = 0; ix < roi_bin_grid_w; ++ix) { - const T xx = roi_start_x + pw * bin_size_w + - T(ix + 0.5) * bin_size_w / roi_bin_grid_w; - T y = yy * cos_theta - xx * sin_theta + roi_center_y; - T x = yy * sin_theta + xx * cos_theta + roi_center_x; - T w1, w2, w3, w4; - bool empty = false; - int x_low, x_high, y_low, y_high; - bilinearInterpolate(height, width, x, y, &w1, &w2, &w3, &w4, &x_low, - &x_high, &y_low, &y_high, &empty); - if (empty) { - continue; - } else { - __bang_mul_scalar(nram_output, nram_ping, w1 * zero_sign, c_limit); - __bang_atomic_add( - (T *)nram_output, - bottom_grad_dram + batch_idx * height * width * channel + - y_low * width * channel + x_low * channel + c_offset, - (T *)nram_output, c_slice); - __bang_mul_scalar(nram_output, nram_ping, w2 * zero_sign, c_limit); - __bang_atomic_add( - (T *)nram_output, - bottom_grad_dram + batch_idx * height * width * channel + - y_low * width * channel + x_high * channel + c_offset, - (T *)nram_output, c_slice); - __bang_mul_scalar(nram_output, nram_ping, w3 * zero_sign, c_limit); - __bang_atomic_add( - (T *)nram_output, - bottom_grad_dram + batch_idx * height * width * channel + - y_high * width * channel + x_low * channel + c_offset, - (T *)nram_output, c_slice); - __bang_mul_scalar(nram_output, nram_ping, w4 * zero_sign, c_limit); - __bang_atomic_add( - (T *)nram_output, - bottom_grad_dram + batch_idx * height * width * channel + - y_high * width * channel + x_high * channel + c_offset, - (T *)nram_output, c_slice); - } - } - } - swap(nram_ping, nram_pong); - c_rem -= c_slice; - c_offset += c_slice; - __asm__ volatile("sync;"); - } - is_first_bin = false; - } -} - -__mlu_global__ void MLUUnion1KernelRoiAlignRotatedForward( - const void *features, const void *rois, void *output, const int batch, - const int height, const int width, const int channel, const int rois_num, - const RoiAlignRotatedParams rroiAlignParams, - const cnrtDataType_t data_type) { - if (0x80 == coreId) { - return; - } - - if (data_type == CNRT_FLOAT32) { - roiAlignRotatedForward((float *)features, (float *)rois, batch, height, - width, channel, rois_num, rroiAlignParams, - (float *)output); - } else { - roiAlignRotatedForward((half *)features, (half *)rois, batch, height, width, - channel, rois_num, rroiAlignParams, (half *)output); - } -} - -__mlu_global__ void MLUUnion1KernelRoiAlignRotatedBackward( - const void *top_grad, const void *rois, void *bottom_grad, const int batch, - const int height, const int width, const int channel, const int rois_num, - const RoiAlignRotatedParams rroiAlignParams, - const cnrtDataType_t data_type) { - if (0x80 == coreId) { - return; - } - - if (data_type == CNRT_FLOAT32) { - roiAlignRotatedBackward((float *)top_grad, (float *)rois, batch, height, - width, channel, rois_num, rroiAlignParams, - (float *)bottom_grad); - } else { - roiAlignRotatedBackward((half *)top_grad, (half *)rois, batch, height, - width, channel, rois_num, rroiAlignParams, - (half *)bottom_grad); - } -} - -void KernelRoiAlignRotatedForward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const cnrtDataType_t d_type, const void *features, const void *rois, - void *output, const int batch, const int height, const int width, - const int channel, const int rois_num, - const RoiAlignRotatedParams roiAlignRotatedParams) { - MLUUnion1KernelRoiAlignRotatedForward<<>>( - features, rois, output, batch, height, width, channel, rois_num, - roiAlignRotatedParams, d_type); -} - -void KernelRoiAlignRotatedBackward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const cnrtDataType_t d_type, const void *top_grad, const void *rois, - void *bottom_grad, const int batch, const int height, const int width, - const int channel, const int rois_num, - const RoiAlignRotatedParams roiAlignRotatedParams) { - MLUUnion1KernelRoiAlignRotatedBackward<<>>( - top_grad, rois, bottom_grad, batch, height, width, channel, rois_num, - roiAlignRotatedParams, d_type); -} diff --git a/mmcv/ops/csrc/common/mlu/roi_align_rotated_utils.hpp b/mmcv/ops/csrc/common/mlu/roi_align_rotated_utils.hpp deleted file mode 100644 index cd0ec02..0000000 --- a/mmcv/ops/csrc/common/mlu/roi_align_rotated_utils.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#ifndef ROI_ALIGN_ROTATED_UTILS_HPP_ -#define ROI_ALIGN_ROTATED_UTILS_HPP_ - -struct RoiAlignRotatedParams { - int pooled_height; - int pooled_width; - int sample_ratio; - float spatial_scale; - bool aligned; - bool clockwise; -}; - -#endif // ROI_ALIGN_ROTATED_UTILS_HPP_ diff --git a/mmcv/ops/csrc/common/mlu/roi_pool_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/roi_pool_mlu_kernel.mlu deleted file mode 100644 index 3a6d2d3..0000000 --- a/mmcv/ops/csrc/common/mlu/roi_pool_mlu_kernel.mlu +++ /dev/null @@ -1,747 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "common_mlu_helper.hpp" - -#define ALIGN_SIZE 64 -#define PIPELINE_COMMON_NUM 2 -#define PIPELINE_PINGPONG_NUM 10 - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -namespace forward { -template -__mlu_func__ void getRoiBinInfo(T *input_v, T *rois_v, int bin_i, int height, - int width, int channels, int p_height, - int p_width, T spatial_scale, int *bin_x1, - int *bin_y1, int *bin_x2, int *bin_y2, - int *bin_wdim, int *bin_hdim, int *bin_dims, - T **input_base, bool *is_empty) { - int pw = bin_i % p_width; - int ph = (bin_i / p_width) % p_height; - int roi_n = bin_i / p_width / p_height; - - /*roi*/ - const T *roi_info = rois_v + roi_n * 5; // {{batch, x1, y1, x2, y2},,,} - int batch_index = (int)roi_info[0]; - int roi_x1 = round(roi_info[1] * spatial_scale); - int roi_y1 = round(roi_info[2] * spatial_scale); - int roi_x2 = round(roi_info[3] * spatial_scale); - int roi_y2 = round(roi_info[4] * spatial_scale); - int roi_w = roi_x2 - roi_x1 + 1 > 1 ? roi_x2 - roi_x1 + 1 : 1; - int roi_h = roi_y2 - roi_y1 + 1 > 1 ? roi_y2 - roi_y1 + 1 : 1; - - /*bin*/ - T bin_w = (T)roi_w / (T)p_width; - T bin_h = (T)roi_h / (T)p_height; - - *bin_x1 = (int)floor((T)pw * bin_w) + roi_x1; - *bin_x1 = *bin_x1 > 0 ? *bin_x1 : 0; - *bin_x1 = *bin_x1 < width ? *bin_x1 : width; - - *bin_y1 = (int)floor((T)ph * bin_h) + roi_y1; - *bin_y1 = *bin_y1 > 0 ? *bin_y1 : 0; - *bin_y1 = *bin_y1 < height ? *bin_y1 : height; - - *bin_x2 = (int)ceil((T)(pw + 1) * bin_w) + roi_x1; - *bin_x2 = *bin_x2 > 0 ? *bin_x2 : 0; - *bin_x2 = *bin_x2 < width ? *bin_x2 : width; - - *bin_y2 = (int)ceil((T)(ph + 1) * bin_h) + roi_y1; - *bin_y2 = *bin_y2 > 0 ? *bin_y2 : 0; - *bin_y2 = *bin_y2 < height ? *bin_y2 : height; - - *input_base = input_v + batch_index * height * width * channels; - *bin_wdim = *bin_x2 - *bin_x1; - *bin_hdim = *bin_y2 - *bin_y1; - *bin_dims = (*bin_hdim) * (*bin_wdim); - *is_empty = (*bin_y2 <= *bin_y1) || (*bin_x2 <= *bin_x1); -} - -template -__mlu_func__ void MLUUnion1Roipool(T *input_v, T *rois_v, int batch, - int channels, int height, int width, - int p_height, int p_width, int rois_num, - T spatial_scale, T *output_v, int *argmax) { - /* - * NRAM partition - * |---------------------------------------------------| - * | ping | - * |---------------------------------------------------| - * | pong | - * |---------------------------------------------------| - * | out | - * |---------------------------------------------------| - * | argmax | - * |---------------------------------------------------| - * | a | - * |---------------------------------------------------| - * | b | - * |---------------------------------------------------| - */ - uint32_t is_half = sizeof(T) == sizeof(half) ? true : false; - uint32_t t_size = sizeof(T); - uint32_t float_div = NFU_ALIGN_SIZE / sizeof(float); - uint32_t half_div = NFU_ALIGN_SIZE / sizeof(half); - - uint32_t channels_align = PAD_UP(channels, float_div); - uint32_t nram_limit = PAD_DOWN( - (MAX_NRAM_SIZE / sizeof(float) - 4 * channels_align) / 2, half_div); - - // nram PING/PONG, output, argamx, a, b - float *nram_ping = (float *)nram_buffer; - float *nram_pong = (float *)nram_buffer + nram_limit; - float *nram_out = (float *)nram_buffer + 2 * nram_limit; - float *nram_argmax = nram_out + channels_align; - float *nram_a = nram_out + 2 * channels_align; - float *nram_b = nram_out + 3 * channels_align; - - uint32_t c_bins_num = rois_num * p_height * p_width; - uint32_t task_bins = c_bins_num / taskDim; - uint32_t rem_bins = c_bins_num % taskDim; - if (taskId < rem_bins) { - task_bins += 1; - } - int bin_first = - (c_bins_num / taskDim) * taskId + (taskId > rem_bins ? rem_bins : taskId); - int bins_loop = bin_first + task_bins; - - T *input_base = NULL; - T *output_base = output_v + bin_first * channels; - int *argmax_base = NULL != argmax ? argmax + bin_first * channels : NULL; - int bin_x1, bin_y1, bin_x2, bin_y2, bin_wdim, bin_hdim, bin_dims; - int pbin_x1, pbin_y1, pbin_x2, pbin_y2, pbin_wdim, pbin_hdim, pbin_dims; - bool is_empty = false; - bool pong_is_empty = false; - bool is_first_bin = true; - uint32_t src_offset = 0; - uint32_t dst_offset = 0; - uint32_t nram_offset = 0; - uint32_t half_offset = - is_half ? (nram_limit / 2 / half_div * half_div) * 2 : 0; - float *nram_tmp = NULL; - - uint32_t c_slice = 0; - uint32_t c_slice_align = 0; - uint32_t pongc_slice = 0; - uint32_t pongc_slice_align = 0; - for (int bin_i = bin_first; bin_i < bins_loop; bin_i++) { - getRoiBinInfo((T *)input_v, (T *)rois_v, bin_i, height, width, channels, - p_height, p_width, (T)spatial_scale, &bin_x1, &bin_y1, - &bin_x2, &bin_y2, &bin_wdim, &bin_hdim, &bin_dims, - &input_base, &is_empty); - uint32_t c_rem = channels; - c_slice = nram_limit / bin_dims / float_div * float_div; - - if (is_first_bin && !is_empty) { - c_slice = c_slice > c_rem ? c_rem : c_slice; - c_slice_align = PAD_UP(c_slice, float_div); - for (int h = bin_y1; h < bin_y2; h++) { - src_offset = (h * width + bin_x1) * channels; - nram_offset = (h - bin_y1) * bin_wdim * c_slice_align + half_offset; - if (c_slice_align == channels) { - __memcpy((T *)nram_ping + nram_offset, (T *)input_base + src_offset, - bin_wdim * c_slice * t_size, GDRAM2NRAM); - } else { - __memcpy((T *)nram_ping + nram_offset, (T *)input_base + src_offset, - c_slice * t_size, GDRAM2NRAM, c_slice_align * t_size, - channels * t_size, bin_wdim - 1); - } - } - } - uint32_t c_offset = 0; - while (c_rem > 0) { - c_slice = c_slice > c_rem ? c_rem : c_slice; - c_slice_align = PAD_UP(c_slice, float_div); - - /*__memcpy_async*/ - if (c_rem - c_slice > 0 && !is_empty) { - pongc_slice = c_rem - c_slice > c_slice ? c_slice : c_rem - c_slice; - pongc_slice_align = PAD_UP(pongc_slice, float_div); - for (int h = bin_y1; h < bin_y2; h++) { - src_offset = (h * width + bin_x1) * channels + c_offset; - nram_offset = - (h - bin_y1) * bin_wdim * pongc_slice_align + half_offset; - __memcpy_async((T *)nram_pong + nram_offset, - (T *)input_base + src_offset + c_slice, - pongc_slice * t_size, GDRAM2NRAM, - pongc_slice_align * t_size, channels * t_size, - bin_wdim - 1); - } - } else if (bin_i + 1 < bins_loop) { - getRoiBinInfo((T *)input_v, (T *)rois_v, bin_i + 1, height, width, - channels, p_height, p_width, (T)spatial_scale, &pbin_x1, - &pbin_y1, &pbin_x2, &pbin_y2, &pbin_wdim, &pbin_hdim, - &pbin_dims, &input_base, &pong_is_empty); - pongc_slice = PAD_DOWN(nram_limit / pbin_dims, float_div); - pongc_slice = pongc_slice > channels ? channels : pongc_slice; - pongc_slice_align = PAD_UP(pongc_slice, float_div); - if (!pong_is_empty) { - for (int h = pbin_y1; h < pbin_y2; h++) { - src_offset = (h * width + pbin_x1) * channels; - nram_offset = - (h - pbin_y1) * pbin_wdim * pongc_slice_align + half_offset; - if (pongc_slice_align == channels) { - __memcpy_async((T *)nram_pong + nram_offset, - (T *)input_base + src_offset, - pbin_wdim * pongc_slice * t_size, GDRAM2NRAM); - } else { - __memcpy_async((T *)nram_pong + nram_offset, - (T *)input_base + src_offset, pongc_slice * t_size, - GDRAM2NRAM, pongc_slice_align * t_size, - channels * t_size, pbin_wdim - 1); - } - } - } - } - - if (is_empty) { - __bang_write_value((T *)nram_out, c_slice_align, (T)0); - __memcpy((T *)output_base + dst_offset + c_offset, (T *)nram_out, - c_slice * t_size, NRAM2GDRAM); - if (NULL != argmax) { - __bang_write_value((int32_t *)nram_out, c_slice_align, (int32_t)(-1)); - __memcpy((int32_t *)argmax_base + dst_offset + c_offset, - (int32_t *)nram_out, c_slice * sizeof(int32_t), NRAM2GDRAM); - } - } else { - if (is_half) { - uint32_t bin_align64 = PAD_UP(bin_dims * c_slice_align, half_div); - __bang_half2float((float *)nram_ping, (half *)nram_ping + half_offset, - bin_align64); - } - __bang_maxpool((float *)nram_out, (float *)nram_ping, c_slice_align, - bin_hdim, bin_wdim, bin_hdim, bin_wdim, 1, 1); - if (is_half) { - uint32_t c_align64 = PAD_UP(c_slice_align, half_div); - __bang_float2half_rd((half *)nram_out, (float *)nram_out, c_align64); - } - __memcpy((T *)output_base + dst_offset + c_offset, (T *)nram_out, - c_slice * t_size, NRAM2GDRAM); - if (NULL != argmax) { - /*compute max_index*/ - __bang_maxpool_index((uint32_t *)nram_out, (float *)nram_ping, - c_slice_align, bin_hdim, bin_wdim, bin_hdim, - bin_wdim, 1, 1); - convertInt2Float((float *)nram_argmax, (float *)nram_a, - (int32_t *)nram_out, (float *)nram_b, c_slice_align); - - /*compute input_h*/ - for (int i = 0; i < c_slice; i++) { - nram_out[i] = (float)(((uint32_t *)nram_out)[i] / bin_wdim); - } - __bang_add_scalar((float *)nram_a, (float *)nram_out, (float)bin_y1, - c_slice_align); - __bang_mul_scalar((float *)nram_ping, (float *)nram_a, (float)width, - c_slice_align); - - /*compute input_w*/ - __bang_mul_scalar((float *)nram_a, (float *)nram_out, (float)bin_wdim, - c_slice_align); - __bang_sub((float *)nram_a, (float *)nram_argmax, (float *)nram_a, - c_slice_align); - __bang_add_scalar((float *)nram_a, (float *)nram_a, (float)bin_x1, - c_slice_align); - __bang_add((float *)nram_out, (float *)nram_ping, (float *)nram_a, - c_slice_align); - convertFloat2Int((int32_t *)nram_argmax, (float *)nram_a, - (float *)nram_out, (float *)nram_b, c_slice_align); - __memcpy((int32_t *)argmax_base + dst_offset + c_offset, - (int32_t *)nram_argmax, c_slice * sizeof(int32_t), - NRAM2GDRAM); - } - } - nram_tmp = nram_ping; - nram_ping = nram_pong; - nram_pong = nram_tmp; - c_offset += c_slice; - c_rem -= c_slice; - __asm__ volatile("sync;"); - } - dst_offset += channels; - is_first_bin = false; - } -} - -__mlu_global__ void MLUKernelRoiPool(cnrtDataType_t data_type, - const void *input_data, - const void *input_rois, int batch, - int channels, int height, int width, - int pooled_height, int pooled_width, - int rois_num, float spatial_scale, - void *output_data, int *argmax) { - switch (data_type) { - case CNRT_FLOAT16: { - MLUUnion1Roipool((half *)input_data, (half *)input_rois, batch, channels, - height, width, pooled_height, pooled_width, rois_num, - (half)spatial_scale, (half *)output_data, argmax); - }; break; - case CNRT_FLOAT32: { - MLUUnion1Roipool((float *)input_data, (float *)input_rois, batch, - channels, height, width, pooled_height, pooled_width, - rois_num, (float)spatial_scale, (float *)output_data, - argmax); - }; break; - default: { break; } - } -} -} // namespace forward - -namespace backward { -// Convert index of argmax from global grads_image to local bin in RoI. Vector -// operations do not support int type, so conversion from int to float is -// performed here. -__mlu_func__ void convertIndex( - int32_t *nram_argmax, int32_t *nram_argmax_fp, int32_t *nram_argmax_fp_bk1, - int32_t *nram_argmax_fp_bk2, int32_t *nram_argmax_int, - int32_t *nram_argmax_int_h, int32_t *nram_argmax_int_w, - int32_t *nram_argmax_fp_h, int32_t *nram_argmax_fp_w, - float *nram_atomic_add, float *nram_grads_image, int width, int height, - int wstart, int hstart, int w_compute, int h_compute, int align_c, - int channels, int loop_flag, int loop_id, int true_limit) { - convertInt2Float((float *)nram_argmax_fp, (float *)nram_argmax_fp_bk1, - (int *)nram_argmax, (float *)nram_argmax_fp_bk2, align_c); - - // This step uses scalar division, because the above vector division causes - // rounding accuracy problem. - for (int i = 0; i < channels; ++i) { - *((float *)nram_argmax_fp + i) = *((float *)nram_argmax_fp + i) / width; - } - - // Use 'float2int_tz' to perform '*((int32_t*)nram_argmax + i) / width' - // operation. - convertFloat2Int((int *)nram_argmax_int_h, (float *)nram_argmax_fp_bk1, - (float *)nram_argmax_fp, (float *)nram_argmax_fp_bk2, - align_c); - convertInt2Float((float *)nram_argmax_fp, (float *)nram_argmax_fp_bk1, - (int *)nram_argmax_int_h, (float *)nram_argmax_fp_bk2, - align_c); - - // Perform 'temp_result - hstart' operation - __bang_sub_scalar((float *)nram_argmax_fp_h, (float *)nram_argmax_fp, hstart, - align_c); - - // Perform 'temp_result1 - temp_result2 * width' operation - __bang_mul_scalar((float *)nram_argmax_fp_w, (float *)nram_argmax_fp, width, - align_c); - convertInt2Float((float *)nram_argmax_fp, (float *)nram_argmax_fp_bk1, - (int *)nram_argmax, (float *)nram_argmax_fp_bk2, align_c); - __bang_sub((float *)nram_argmax_fp_w, (float *)nram_argmax_fp, - (float *)nram_argmax_fp_w, align_c); - - // Perform 'temp_result - wstart' operation - __bang_sub_scalar((float *)nram_argmax_fp_w, (float *)nram_argmax_fp_w, - wstart, align_c); - - // Perform 'temp_result = h * w_compute + w' operation - __bang_mul_scalar((float *)nram_argmax_fp_h, (float *)nram_argmax_fp_h, - w_compute, align_c); - __bang_add((float *)nram_argmax_fp_h, (float *)nram_argmax_fp_h, - (float *)nram_argmax_fp_w, align_c); - - if (loop_flag == 1) { - __bang_sub_scalar((float *)nram_argmax_fp_h, (float *)nram_argmax_fp_h, - (loop_id * true_limit), align_c); - } - convertFloat2Int((int *)nram_argmax_int, (float *)nram_argmax_fp_bk1, - (float *)nram_argmax_fp_h, (float *)nram_argmax_fp_bk2, - align_c); -} - -template -__mlu_func__ void MLUUnion1Roipool(const T *rois, const T *grads, - const int32_t *argmax, T *grads_image, - int channels, int height, int width, - int pooled_height, int pooled_width, - int rois_num, const T spatial_scale, - int high_precision) { - // Calculate the number of rois processed by each core - int bin_num = rois_num * pooled_height * pooled_width; - int loop = - (bin_num % taskDim) ? (bin_num / taskDim + 1) : (bin_num / taskDim); - int tid = taskId * loop; - if (bin_num % taskDim != 0) { - if (tid >= bin_num) { - return; - } else { - // last part is (bin_num - tid). - loop = bin_num - tid < loop ? bin_num - tid : loop; - } - } - int align_c = PAD_UP(channels, ALIGN_SIZE); - // Common part has 2: grads, argmax; ping-pong each is PIPELINE_PINGPONG_NUM. - int data_size = - PAD_DOWN(((MAX_NRAM_SIZE / sizeof(float) - PIPELINE_COMMON_NUM * align_c - - (PIPELINE_PINGPONG_NUM - 1) * align_c * 2) / - 2), - ALIGN_SIZE); - int hw_limit = data_size / align_c; - float *nram_grads = (float *)nram_buffer; - for (int idx = tid; idx < tid + loop; ++idx) { - // (n, ph, pw) is a C in the pooled output - int pw = idx % pooled_width; - int ph = (idx / pooled_width) % pooled_height; - int n = idx / pooled_width / pooled_height; - - const T *offset_rois = (const T *)(rois + n * 5); - int roi_batch_ind = int(offset_rois[0]); - // Calculate the roi region on feature maps - int roi_start_w = round(offset_rois[1] * spatial_scale); - int roi_start_h = round(offset_rois[2] * spatial_scale); - int roi_end_w = round(offset_rois[3] * spatial_scale); - int roi_end_h = round(offset_rois[4] * spatial_scale); - // Force malformed rois to 1x1 - int roi_width = - roi_end_w - roi_start_w + 1 > 1 ? roi_end_w - roi_start_w + 1 : 1; - int roi_height = - roi_end_h - roi_start_h + 1 > 1 ? roi_end_h - roi_start_h + 1 : 1; - T bin_size_h = (T)roi_height / (T)pooled_height; - T bin_size_w = (T)roi_width / (T)pooled_width; - - // The corresponding bin region - int hstart = int(floor((T)ph * bin_size_h)); - int wstart = int(floor((T)pw * bin_size_w)); - int hend = int(ceil((T)(ph + 1) * bin_size_h)); - int wend = int(ceil((T)(pw + 1) * bin_size_w)); - - // Add roi offsets and clip to input boundaries, min(max(A, B), C); - hstart = hstart + roi_start_h > 0 ? hstart + roi_start_h : 0; - hstart = hstart < height ? hstart : height; - hend = hend + roi_start_h > 0 ? hend + roi_start_h : 0; - hend = hend < height ? hend : height; - wstart = wstart + roi_start_w > 0 ? wstart + roi_start_w : 0; - wstart = wstart < width ? wstart : width; - wend = wend + roi_start_w > 0 ? wend + roi_start_w : 0; - wend = wend < width ? wend : width; - - bool is_empty = (hend <= hstart) || (wend <= wstart); - if (!is_empty) { - int h_compute = hend - hstart; - int w_compute = wend - wstart; - int true_limit = - hw_limit < h_compute * w_compute ? hw_limit : h_compute * w_compute; - int loop_int = (h_compute * w_compute) / true_limit; - int rem = (h_compute * w_compute) % true_limit; - int32_t *nram_argmax = (int32_t *)nram_grads + align_c; - int32_t *nram_argmax_fp = (int32_t *)nram_argmax + align_c; - int32_t *nram_argmax_fp_bk1 = (int32_t *)nram_argmax_fp + align_c; - int32_t *nram_argmax_fp_bk2 = (int32_t *)nram_argmax_fp_bk1 + align_c; - int32_t *nram_argmax_int = (int32_t *)nram_argmax_fp_bk2 + align_c; - int32_t *nram_argmax_int_h = (int32_t *)nram_argmax_int + align_c; - int32_t *nram_argmax_int_w = (int32_t *)nram_argmax_int_h + align_c; - int32_t *nram_argmax_fp_h = (int32_t *)nram_argmax_int_w + align_c; - int32_t *nram_argmax_fp_w = (int32_t *)nram_argmax_fp_h + align_c; - float *nram_atomic_add = (float *)nram_argmax_fp_w + align_c; - float *nram_grads_image = (float *)nram_atomic_add + align_c; - if (true_limit == h_compute * w_compute) { - /* - * NRAM partition - * |---------------------------------------------------| - * | grads | - * |---------------------------------------------------| - * | argmax | - * |---------------------------------------------------| - * | argmax_temp | - * |---------------------------------------------------| - * | atomic_add | - * |---------------------------------------------------| - * | grads_image | - * |---------------------------------------------------| - */ - - // Load the data from GDRAM to NRAM. - __memcpy( - (T *)nram_grads + align_c * high_precision, - (const T *)grads + - (n * pooled_height * pooled_width + ph * pooled_width + pw) * - channels, - channels * sizeof(T), GDRAM2NRAM); - if (high_precision) { - __bang_half2float((float *)nram_grads, - (half *)nram_grads + align_c * high_precision, - align_c); - } - - __memcpy((int32_t *)nram_argmax, (const int32_t *)argmax + - (n * pooled_height * pooled_width + - ph * pooled_width + pw) * - channels, - channels * sizeof(int32_t), GDRAM2NRAM); - - // Perform pooling operation on NRAM. - convertIndex(nram_argmax, nram_argmax_fp, nram_argmax_fp_bk1, - nram_argmax_fp_bk2, nram_argmax_int, nram_argmax_int_h, - nram_argmax_int_w, nram_argmax_fp_h, nram_argmax_fp_w, - nram_atomic_add, nram_grads_image, width, height, wstart, - hstart, w_compute, h_compute, align_c, channels, 0, 0, 0); - __bang_maxpool_bp((float *)nram_grads_image, (float *)nram_grads, - (int32_t *)nram_argmax_int, align_c, h_compute, - w_compute, h_compute, w_compute, h_compute, - w_compute); - if (high_precision) { - __bang_float2half_rd((half *)nram_grads_image, - (float *)nram_grads_image, - h_compute * w_compute * align_c); - } - - // Store the result on NRAM back to GDRAM. - for (int hc = 0; hc < h_compute; ++hc) { - for (int wc = 0; wc < w_compute; ++wc) { - T *dst = (T *)nram_atomic_add; - int grad_image_offset = (roi_batch_ind * height * width + - (hc + hstart) * width + wc + wstart) * - channels; - T *src1 = (T *)grads_image + grad_image_offset; - int nram_grads_image_offset = (hc * w_compute + wc) * align_c; - T *src2 = (T *)nram_grads_image + nram_grads_image_offset; - __bang_atomic_add(dst, src1, src2, channels); - } - } - } else if (true_limit > 0) { - /* - * NRAM partition - * |---------------------------------------------------| - * | grads | - * |---------------------------------------------------| - * | argmax | - * |--------------------ping_pong----------------------| - * | argmax_temp | argmax_temp | - * |------------------------|--------------------------| - * | atomic_add | atomic_add | - * |------------------------|--------------------------| - * | grads_image | grads_image | - * |---------------------------------------------------| - */ - - // Load the data from GDRAM to NRAM. - __memcpy( - (T *)nram_grads + align_c * high_precision, - (const T *)grads + - (n * pooled_height * pooled_width + ph * pooled_width + pw) * - channels, - channels * sizeof(T), GDRAM2NRAM); - if (high_precision) { - __bang_half2float((float *)nram_grads, - (half *)nram_grads + align_c * high_precision, - align_c); - } - __memcpy((int32_t *)nram_argmax, (const int32_t *)argmax + - (n * pooled_height * pooled_width + - ph * pooled_width + pw) * - channels, - channels * sizeof(int32_t), GDRAM2NRAM); - - int ping_pong = 0; - int ping_pong_offset = - (MAX_NRAM_SIZE / sizeof(float) - align_c * PIPELINE_COMMON_NUM) / 2; - for (int loop_id = 0; loop_id <= loop_int; ++loop_id) { - int size = (loop_id == loop_int) ? rem : true_limit; - if (size == 0) { - break; - } - // Perform pooling operation on NRAM. - nram_argmax_fp = - (int32_t *)nram_argmax + align_c + ping_pong * ping_pong_offset; - nram_argmax_fp_bk1 = (int32_t *)nram_argmax_fp + align_c; - nram_argmax_fp_bk2 = (int32_t *)nram_argmax_fp_bk1 + align_c; - nram_argmax_int = (int32_t *)nram_argmax_fp_bk2 + align_c; - nram_argmax_int_h = (int32_t *)nram_argmax_int + align_c; - nram_argmax_int_w = (int32_t *)nram_argmax_int_h + align_c; - nram_argmax_fp_h = (int32_t *)nram_argmax_int_w + align_c; - nram_argmax_fp_w = (int32_t *)nram_argmax_fp_h + align_c; - nram_atomic_add = (float *)nram_argmax_fp_w + align_c; - nram_grads_image = (float *)nram_atomic_add + align_c; - int loop_id_1 = loop_id; - int size_1 = ((loop_id_1) == loop_int) ? rem : true_limit; - if (size_1 == 0) { - break; - } - convertIndex(nram_argmax, nram_argmax_fp, nram_argmax_fp_bk1, - nram_argmax_fp_bk2, nram_argmax_int, nram_argmax_int_h, - nram_argmax_int_w, nram_argmax_fp_h, nram_argmax_fp_w, - nram_atomic_add, nram_grads_image, width, height, wstart, - hstart, w_compute, h_compute, align_c, channels, 1, - loop_id_1, true_limit); - __bang_maxpool_bp((float *)nram_grads_image, (float *)nram_grads, - (int32_t *)nram_argmax_int, align_c, size_1, 1, - size_1, 1, size_1, 1); - if (high_precision) { - __bang_float2half_rd((half *)nram_grads_image, - (float *)nram_grads_image, size_1 * align_c); - } - - // Store the result on NRAM back to GDRAM. - for (int index_size = 0; index_size < size; ++index_size) { - int h = (loop_id * true_limit + index_size) / w_compute; - int w = (loop_id * true_limit + index_size) % w_compute; - T *dst = (T *)nram_atomic_add; - T *grads_image_n = - (T *)grads_image + roi_batch_ind * height * width * channels; - T *src1 = (T *)grads_image_n + - ((h + hstart) * width + (w + wstart)) * channels; - T *src2 = (T *)nram_grads_image + index_size * align_c; - __bang_atomic_add(dst, src1, src2, channels); - } - ping_pong = 1 - ping_pong; - } - } else { - /* - * NRAM partition - * |---------------------------------------------------| - * | grads | - * |---------------------------------------------------| - * | argmax | - * |--------------------ping_pong----------------------| - * | argmax_temp | argmax_temp | - * |------------------------|--------------------------| - * | atomic_add | atomic_add | - * |------------------------|--------------------------| - * | grads_image | grads_image | - * |---------------------------------------------------| - */ - - int c_limit = - PAD_DOWN(MAX_NRAM_SIZE / sizeof(float) / - (PIPELINE_COMMON_NUM + PIPELINE_PINGPONG_NUM * 2), - ALIGN_SIZE); - int loop_int = channels / c_limit; - int rem = channels % c_limit; - int ping_pong = 0; - int ping_pong_offset = - (MAX_NRAM_SIZE / sizeof(float) - c_limit * PIPELINE_COMMON_NUM) / 2; - for (int loop_id = 0; loop_id <= loop_int; ++loop_id) { - int size = (loop_id == loop_int) ? rem : c_limit; - if (size == 0) { - break; - } - nram_argmax_fp = - (int32_t *)nram_argmax + c_limit + ping_pong * ping_pong_offset; - nram_argmax_fp_bk1 = (int32_t *)nram_argmax_fp + c_limit; - nram_argmax_fp_bk2 = (int32_t *)nram_argmax_fp_bk1 + c_limit; - nram_argmax_int = (int32_t *)nram_argmax_fp_bk2 + c_limit; - nram_argmax_int_h = (int32_t *)nram_argmax_int + c_limit; - nram_argmax_int_w = (int32_t *)nram_argmax_int_h + c_limit; - nram_argmax_fp_h = (int32_t *)nram_argmax_int_w + c_limit; - nram_argmax_fp_w = (int32_t *)nram_argmax_fp_h + c_limit; - nram_atomic_add = (float *)nram_argmax_fp_w + c_limit; - nram_grads_image = (float *)nram_atomic_add + c_limit; - - // This pipeline loads the data from GDRAM to NRAM. - __memcpy((T *)nram_grads + c_limit * high_precision, - (const T *)grads + - n * pooled_height * pooled_width * channels + - ph * pooled_width * channels + pw * channels + - loop_id * c_limit, - size * sizeof(T), GDRAM2NRAM); - if (high_precision) { - __bang_half2float((float *)nram_grads, - (half *)nram_grads + c_limit * high_precision, - c_limit); - } - __memcpy((int32_t *)nram_argmax, - (const int32_t *)argmax + - n * pooled_height * pooled_width * channels + - ph * pooled_width * channels + pw * channels + - loop_id * c_limit, - size * sizeof(int32_t), GDRAM2NRAM); - - for (int hc = 0; hc < h_compute; ++hc) { - for (int wc = 0; wc < w_compute; ++wc) { - // This pipeline performs pooling operation on NRAM. - convertIndex( - nram_argmax, nram_argmax_fp, nram_argmax_fp_bk1, - nram_argmax_fp_bk2, nram_argmax_int, nram_argmax_int_h, - nram_argmax_int_w, nram_argmax_fp_h, nram_argmax_fp_w, - nram_atomic_add, nram_grads_image, width, height, wstart + wc, - hstart + hc, h_compute, w_compute, c_limit, size, 0, 0, 0); - __bang_maxpool_bp((float *)nram_grads_image, (float *)nram_grads, - (int32_t *)nram_argmax_int, c_limit, 1, 1, 1, 1, - 1, 1); - if (high_precision) { - __bang_float2half_rd((half *)nram_grads_image, - (float *)nram_grads_image, c_limit); - } - // This pipeline stores the result on NRAM back to GDRAM. - T *dst = (T *)nram_atomic_add; - T *grads_image_n = - (T *)grads_image + roi_batch_ind * height * width * channels; - T *src1 = (T *)grads_image_n + - ((hc + hstart) * width + (wc + wstart)) * channels + - loop_id * c_limit; - T *src2 = (T *)nram_grads_image; - __bang_atomic_add(dst, src1, src2, size); - } - } - ping_pong = 1 - ping_pong; - } - } - } - } -} - -__mlu_global__ void MLUKernelRoiPoolBackward( - const void *grads, const void *rois, const int *argmax, void *grads_image, - int rois_num, int pooled_height, int pooled_width, int channels, int no, - int height, int width, const float spatial_scale, - const cnrtDataType_t k_dtype) { - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - switch (k_dtype) { - case CNRT_FLOAT16: { - // Using the float type '__bang_max_pool_bp' instruction to increase the - // bit width. - const int high_precision = 1; - MLUUnion1Roipool((const half *)rois, (const half *)grads, - (const int32_t *)argmax, (half *)grads_image, channels, - height, width, pooled_height, pooled_width, rois_num, - (const half)spatial_scale, high_precision); - }; break; - case CNRT_FLOAT32: { - const int high_precision = 0; - MLUUnion1Roipool((const float *)rois, (const float *)grads, - (const int32_t *)argmax, (float *)grads_image, channels, - height, width, pooled_height, pooled_width, rois_num, - (const float)spatial_scale, high_precision); - }; break; - default: { break; } - } -} -} // namespace backward - -void KernelRoiPoolForward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, cnrtDataType_t data_type, - const void *input_data, const void *input_rois, - const int batch, const int channels, const int height, - const int width, const int pooled_height, - const int pooled_width, const int rois_num, - const float spatial_scale, void *output_data, - int *argmax) { - forward::MLUKernelRoiPool<<>>( - data_type, input_data, input_rois, batch, channels, height, width, - pooled_height, pooled_width, rois_num, spatial_scale, output_data, - argmax); -} - -void KernelRoiPoolBackward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, cnrtDataType_t k_dtype, - const void *grad_output_ptr, const void *rois_ptr, - const int *argmax_ptr, void *grad_input_ptr, - const int box_num, const int pooled_height, - const int pooled_width, const int channels, - const int batch, const int height, const int width, - const float spatial_scale) { - backward::MLUKernelRoiPoolBackward<<>>( - grad_output_ptr, rois_ptr, argmax_ptr, grad_input_ptr, box_num, - pooled_height, pooled_width, channels, batch, height, width, - spatial_scale, k_dtype); -} diff --git a/mmcv/ops/csrc/common/mlu/roiaware_pool3d_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/roiaware_pool3d_mlu_kernel.mlu deleted file mode 100644 index 4c1edf0..0000000 --- a/mmcv/ops/csrc/common/mlu/roiaware_pool3d_mlu_kernel.mlu +++ /dev/null @@ -1,747 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ - -#include "common_mlu_helper.hpp" - -#define ROI_OFFSET 7 -#define FLOAT_NRAM_BUFFER_NUM 14 -#define HALF_NRAM_BUFFER_NUM 25 -#define ALIGN_NUM 64 - -__nram__ char data_nram[MAX_NRAM_SIZE]; - -template -__mlu_global__ void MLUUnion1KernelPtsIdxOfVoxels( - const int pool_method, const int boxes_num, const int pts_num, - const int max_pts_each_voxel, const int out_x, const int out_y, - const int out_z, const T *rois, const T *pts, int *pts_idx_of_voxels) { - // params (T)rois: (boxes_num, 7) - // params (T)pts: (3, pts_num) - // params (int)pts_idx_of_voxels: (boxes_num, out_x, out_y, out_z, - // max_pts_each_voxel) - - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - int nram_pts_num = 0; - if (sizeof(T) == sizeof(float)) { - nram_pts_num = PAD_DOWN( - (MAX_NRAM_SIZE / sizeof(float) / FLOAT_NRAM_BUFFER_NUM), ALIGN_NUM); - } else { - nram_pts_num = PAD_DOWN( - (MAX_NRAM_SIZE / sizeof(half) / HALF_NRAM_BUFFER_NUM), ALIGN_NUM); - } - - char *X = NULL; - char *Y = NULL; - char *Z = NULL; - char *local_X = NULL; - char *local_Y = NULL; - char *local_Z = NULL; - char *nram_pts_in_flag = NULL; - float *temp_buffer1 = NULL; - float *temp_buffer2 = NULL; - float *temp_buffer3 = NULL; - float *temp_buffer4 = NULL; - float *temp_buffer5 = NULL; - float *nram_voxel_offset = NULL; - int *nram_pts_idx_seq = NULL; - float *fp_local_X = NULL; - float *fp_local_Y = NULL; - float *fp_local_Z = NULL; - float *fp_nram_pts_in_flag = NULL; - if (sizeof(T) == sizeof(float)) { - X = (char *)((float *)data_nram); - Y = (char *)((float *)data_nram + nram_pts_num); - Z = (char *)((float *)data_nram + nram_pts_num * 2); - local_X = (char *)((float *)data_nram + nram_pts_num * 3); - local_Y = (char *)((float *)data_nram + nram_pts_num * 4); - local_Z = (char *)((float *)data_nram + nram_pts_num * 5); - nram_pts_in_flag = (char *)((float *)data_nram + nram_pts_num * 6); - temp_buffer1 = (float *)data_nram + nram_pts_num * 7; - temp_buffer2 = (float *)data_nram + nram_pts_num * 8; - temp_buffer3 = (float *)data_nram + nram_pts_num * 9; - temp_buffer4 = (float *)data_nram + nram_pts_num * 10; - temp_buffer5 = (float *)data_nram + nram_pts_num * 11; - nram_voxel_offset = (float *)data_nram + nram_pts_num * 12; - nram_pts_idx_seq = (int *)((float *)data_nram + nram_pts_num * 13); - fp_local_X = (float *)local_X; - fp_local_Y = (float *)local_Y; - fp_local_Z = (float *)local_Z; - fp_nram_pts_in_flag = (float *)nram_pts_in_flag; - } else { - X = (char *)((half *)data_nram); - Y = (char *)((half *)data_nram + nram_pts_num); - Z = (char *)((half *)data_nram + nram_pts_num * 2); - local_X = (char *)((half *)data_nram + nram_pts_num * 4); - local_Y = (char *)((half *)data_nram + nram_pts_num * 6); - local_Z = (char *)((half *)data_nram + nram_pts_num * 8); - nram_pts_in_flag = (char *)((half *)data_nram + nram_pts_num * 10); - temp_buffer1 = (float *)((half *)data_nram + nram_pts_num * 11); - temp_buffer2 = (float *)((half *)data_nram + nram_pts_num * 13); - temp_buffer3 = (float *)((half *)data_nram + nram_pts_num * 15); - temp_buffer4 = (float *)((half *)data_nram + nram_pts_num * 17); - temp_buffer5 = (float *)((half *)data_nram + nram_pts_num * 19); - nram_voxel_offset = (float *)((half *)data_nram + nram_pts_num * 21); - nram_pts_idx_seq = (int *)((half *)data_nram + nram_pts_num * 23); - fp_local_X = (float *)((half *)local_X - nram_pts_num); - fp_local_Y = (float *)((half *)local_Y - nram_pts_num); - fp_local_Z = (float *)((half *)local_Z - nram_pts_num); - fp_nram_pts_in_flag = (float *)((half *)nram_pts_in_flag - nram_pts_num); - } - - for (int i = 0; i < nram_pts_num; i++) { - nram_pts_idx_seq[i] = i; - } - - int nram_pts_loop_times = pts_num / nram_pts_num; - int rem_nram_num = pts_num % nram_pts_num; - - for (int roi_index = taskId; roi_index < boxes_num; roi_index += taskDim) { - const T *cur_roi = rois + roi_index * ROI_OFFSET; - T cx = cur_roi[0]; - T cy = cur_roi[1]; - T cz = cur_roi[2]; - T dx = cur_roi[3]; - T dy = cur_roi[4]; - T dz = cur_roi[5]; - T rz = cur_roi[6]; - - T dx_2 = dx / 2.0; - T dy_2 = dy / 2.0; - T dz_2 = dz / 2.0; - - for (int loop_idx = 0; loop_idx <= nram_pts_loop_times; loop_idx++) { - int load_pts_num = - (loop_idx == nram_pts_loop_times) ? rem_nram_num : nram_pts_num; - if (load_pts_num == 0) { - break; - } - int pts_offset_cur_loop = nram_pts_num * loop_idx; - int compute_pts_num = (loop_idx == nram_pts_loop_times) - ? PAD_UP(rem_nram_num, ALIGN_NUM) - : nram_pts_num; - // load pts - __memcpy((void *)X, (T *)pts + pts_offset_cur_loop, - load_pts_num * sizeof(T), GDRAM2NRAM); - __memcpy((void *)Y, (T *)pts + pts_num + pts_offset_cur_loop, - load_pts_num * sizeof(T), GDRAM2NRAM); - __memcpy((void *)Z, (T *)pts + pts_num * 2 + pts_offset_cur_loop, - load_pts_num * sizeof(T), GDRAM2NRAM); - // fabs(local_z) - __bang_sub_scalar((T *)local_Z, (T *)Z, (T)cz, compute_pts_num); - __bang_sub_scalar((T *)temp_buffer1, (T *)Z, (T)(cz + dz_2), - compute_pts_num); - __bang_active_abs((T *)temp_buffer1, (T *)temp_buffer1, compute_pts_num); -#if __BANG_ARCH__ >= 322 - __bang_le_scalar((T *)nram_pts_in_flag, (T *)temp_buffer1, (T)(dz_2), - compute_pts_num); -#else - __bang_write_value((void *)temp_buffer2, compute_pts_num, (T)(dz_2)); - __bang_le((T *)nram_pts_in_flag, (T *)temp_buffer1, (T *)temp_buffer2, - compute_pts_num); -#endif - T cosa = std::cos(-rz); - T sina = std::sin(-rz); - __bang_sub_scalar((T *)temp_buffer3, (T *)X, (T)cx, compute_pts_num); - __bang_sub_scalar((T *)temp_buffer4, (T *)Y, (T)cy, compute_pts_num); - __bang_mul_scalar((T *)temp_buffer1, (T *)temp_buffer3, (T)cosa, - compute_pts_num); - __bang_mul_scalar((T *)temp_buffer2, (T *)temp_buffer4, (T)sina, - compute_pts_num); - // local_x - __bang_sub((T *)local_X, (T *)temp_buffer1, (T *)temp_buffer2, - compute_pts_num); - // fabs(local_x) - __bang_active_abs((T *)temp_buffer1, (T *)local_X, compute_pts_num); - // fabs(local_x) < dx/2 ? 1 : 0 -#if __BANG_ARCH__ >= 322 - __bang_lt_scalar((T *)temp_buffer1, (T *)temp_buffer1, (T)(dx_2), - compute_pts_num); -#else - __bang_write_value((void *)temp_buffer2, compute_pts_num, (T)(dx_2)); - __bang_lt((T *)temp_buffer1, (T *)temp_buffer1, (T *)temp_buffer2, - compute_pts_num); -#endif - __bang_and((T *)nram_pts_in_flag, (T *)nram_pts_in_flag, - (T *)temp_buffer1, - compute_pts_num); // flush res - - __bang_mul_scalar((T *)temp_buffer1, (T *)temp_buffer3, (T)sina, - compute_pts_num); - __bang_mul_scalar((T *)temp_buffer2, (T *)temp_buffer4, (T)cosa, - compute_pts_num); - // local_y - __bang_add((T *)local_Y, (T *)temp_buffer1, (T *)temp_buffer2, - compute_pts_num); - // fabs(local_y) - __bang_active_abs((T *)temp_buffer1, (T *)local_Y, compute_pts_num); - // fabs(local_y) < dy/2 ? 1 : 0 -#if __BANG_ARCH__ >= 322 - __bang_lt_scalar((T *)temp_buffer1, (T *)temp_buffer1, (T)(dy_2), - compute_pts_num); -#else - __bang_write_value((void *)temp_buffer2, compute_pts_num, (T)(dy_2)); - __bang_lt((T *)temp_buffer1, (T *)temp_buffer1, (T *)temp_buffer2, - compute_pts_num); -#endif - __bang_and((T *)nram_pts_in_flag, (T *)nram_pts_in_flag, - (T *)temp_buffer1, - compute_pts_num); // flush res - T x_res = dx / out_x; - T y_res = dy / out_y; - T z_res = dz / out_z; - __bang_add_scalar((T *)local_X, (T *)local_X, (T)(dx_2), compute_pts_num); - __bang_add_scalar((T *)local_Y, (T *)local_Y, (T)(dy_2), compute_pts_num); - // local_Z do not need to add dz/2.0 - -#if (__BANG_ARCH__ >= 322) && (__BANG_ARCH__ != 372) - __bang_div((T *)local_X, (T *)local_X, (T)x_res, compute_pts_num); - __bang_div((T *)local_Y, (T *)local_Y, (T)y_res, compute_pts_num); - __bang_div((T *)local_Z, (T *)local_Z, (T)z_res, compute_pts_num); -#else - __bang_mul_scalar((T *)local_X, (T *)local_X, (T)(1 / x_res), - compute_pts_num); - __bang_mul_scalar((T *)local_Y, (T *)local_Y, (T)(1 / y_res), - compute_pts_num); - __bang_mul_scalar((T *)local_Z, (T *)local_Z, (T)(1 / z_res), - compute_pts_num); -#endif - // float = float2int + int2float, half = half2int + int2float - if (sizeof(T) == sizeof(float)) { -#if __BANG_ARCH__ >= 322 - __bang_float2int32_tz((int *)temp_buffer1, (float *)local_X, - compute_pts_num, 0); - __bang_float2int32_tz((int *)temp_buffer2, (float *)local_Y, - compute_pts_num, 0); - __bang_float2int32_tz((int *)temp_buffer3, (float *)local_Z, - compute_pts_num, 0); - __bang_int322float_rn((float *)fp_local_X, (int *)temp_buffer1, - compute_pts_num, 0); - __bang_int322float_rn((float *)fp_local_Y, (int *)temp_buffer2, - compute_pts_num, 0); - __bang_int322float_rn((float *)fp_local_Z, (int *)temp_buffer3, - compute_pts_num, 0); -#else - convertFloat2Int((int *)temp_buffer1, (float *)temp_buffer2, - (float *)fp_local_X, (float *)temp_buffer3, - compute_pts_num); - convertFloat2Int((int *)temp_buffer2, (float *)temp_buffer3, - (float *)fp_local_Y, (float *)temp_buffer4, - compute_pts_num); - convertFloat2Int((int *)temp_buffer3, (float *)temp_buffer4, - (float *)fp_local_Z, (float *)temp_buffer5, - compute_pts_num); - convertInt2Float((float *)fp_local_X, (float *)temp_buffer4, - (int *)temp_buffer1, (float *)temp_buffer5, - compute_pts_num); - convertInt2Float((float *)fp_local_Y, (float *)temp_buffer4, - (int *)temp_buffer2, (float *)temp_buffer5, - compute_pts_num); - convertInt2Float((float *)fp_local_Z, (float *)temp_buffer4, - (int *)temp_buffer3, (float *)temp_buffer5, - compute_pts_num); -#endif - } else { - __bang_half2float((float *)temp_buffer4, (half *)nram_pts_in_flag, - compute_pts_num); - __bang_move((void *)fp_nram_pts_in_flag, (void *)temp_buffer4, - compute_pts_num * sizeof(float)); -#if __BANG_ARCH__ >= 322 - __bang_half2int32_tz((int *)temp_buffer1, (half *)local_X, - compute_pts_num, 0); - __bang_half2int32_tz((int *)temp_buffer2, (half *)local_Y, - compute_pts_num, 0); - __bang_half2int32_tz((int *)temp_buffer3, (half *)local_Z, - compute_pts_num, 0); - __bang_int322float_rn((float *)fp_local_X, (int *)temp_buffer1, - compute_pts_num, 0); - __bang_int322float_rn((float *)fp_local_Y, (int *)temp_buffer2, - compute_pts_num, 0); - __bang_int322float_rn((float *)fp_local_Z, (int *)temp_buffer3, - compute_pts_num, 0); -#else - __bang_half2int16_tz((int16_t *)temp_buffer1, (half *)local_X, - compute_pts_num, 0); - __bang_half2int16_tz((int16_t *)temp_buffer2, (half *)local_Y, - compute_pts_num, 0); - __bang_half2int16_tz((int16_t *)temp_buffer3, (half *)local_Z, - compute_pts_num, 0); - __bang_int162float((float *)fp_local_X, (int16_t *)temp_buffer1, - compute_pts_num, 0); - __bang_int162float((float *)fp_local_Y, (int16_t *)temp_buffer2, - compute_pts_num, 0); - __bang_int162float((float *)fp_local_Z, (int16_t *)temp_buffer3, - compute_pts_num, 0); -#endif - } - // process index >= 0 - __bang_write_value((float *)temp_buffer4, compute_pts_num, (float)0.0f); - __bang_maxequal((float *)fp_local_X, (float *)fp_local_X, - (float *)temp_buffer4, compute_pts_num); - __bang_maxequal((float *)fp_local_Y, (float *)fp_local_Y, - (float *)temp_buffer4, compute_pts_num); - __bang_maxequal((float *)fp_local_Z, (float *)fp_local_Z, - (float *)temp_buffer4, compute_pts_num); - // process index <= (out_x - 1) - __bang_write_value((float *)temp_buffer5, compute_pts_num, - (float)(out_x - 1)); - __bang_minequal((float *)fp_local_X, (float *)fp_local_X, - (float *)temp_buffer5, compute_pts_num); - __bang_write_value((float *)temp_buffer5, compute_pts_num, - (float)(out_y - 1)); - __bang_minequal((float *)fp_local_Y, (float *)fp_local_Y, - (float *)temp_buffer5, compute_pts_num); - __bang_write_value((float *)temp_buffer5, compute_pts_num, - (float)(out_z - 1)); - __bang_minequal((float *)fp_local_Z, (float *)fp_local_Z, - (float *)temp_buffer5, compute_pts_num); - __bang_mul_scalar((float *)temp_buffer1, (float *)fp_local_X, - (float)(out_y * out_z), compute_pts_num); - __bang_mul_scalar((float *)temp_buffer2, (float *)fp_local_Y, - (float)out_z, compute_pts_num); - __bang_mul_scalar((float *)temp_buffer3, (float *)fp_local_Z, (float)1.0, - compute_pts_num); - __bang_add((float *)nram_voxel_offset, (float *)temp_buffer1, - (float *)temp_buffer2, compute_pts_num); - __bang_add((float *)nram_voxel_offset, (float *)nram_voxel_offset, - (float *)temp_buffer3, compute_pts_num); - __bang_mul_scalar((float *)nram_voxel_offset, (float *)nram_voxel_offset, - (float)max_pts_each_voxel, compute_pts_num); - if (compute_pts_num != load_pts_num) { - __memset_nram((float *)fp_nram_pts_in_flag + load_pts_num, - compute_pts_num - load_pts_num, (float)0.0); - } - __bang_collect((float *)temp_buffer4, (float *)nram_pts_idx_seq, - (float *)fp_nram_pts_in_flag, compute_pts_num); - int pts_num_in_cur_roi = - (int)__bang_count((float *)fp_nram_pts_in_flag, compute_pts_num); - int *pts_idx_cur_voxels = - (int *)pts_idx_of_voxels + - roi_index * out_x * out_y * out_z * max_pts_each_voxel; - for (int idx = 0; idx < pts_num_in_cur_roi; idx++) { - int cur_pts_idx = *((int *)temp_buffer4 + idx); - int offset = (int)(*((float *)nram_voxel_offset + cur_pts_idx)); - int cnt = pts_idx_cur_voxels[offset]; - if (cnt < max_pts_each_voxel - 1) { - pts_idx_cur_voxels[offset + cnt + 1] = - cur_pts_idx + loop_idx * nram_pts_num; - pts_idx_cur_voxels[offset]++; - } - } - } - } -} - -template -__mlu_global__ void MLUUnion1KernelRoiawarePool3dForward( - const int pool_method, const int boxes_num, const int pts_num, - const int channels, const int max_pts_each_voxel, const int out_x, - const int out_y, const int out_z, const T *pts_feature, - const int *pts_idx_of_voxels, T *pooled_features, int *argmax) { - // params (T)pts_feature: (channels, pts_num) - // params (int)pts_idx_of_voxels: (boxes_num, out_x, out_y, out_z, - // max_pts_each_voxel) params (int)argmax: (boxes_num, out_x, out_y, out_z, - // channels) params (T)pooled_features: (boxes_num, out_x, out_y, out_z, - // channels) - - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - int align_num = NFU_ALIGN_SIZE / sizeof(T); - int align_max_pts_each_voxel = PAD_UP(max_pts_each_voxel, align_num); - int nram_channels_limit = - PAD_DOWN((MAX_NRAM_SIZE - 128 - - align_max_pts_each_voxel * (sizeof(int) + sizeof(T))) / - ((align_max_pts_each_voxel + 1) * sizeof(T) + sizeof(int)), - align_num); - int *nram_pts_idx_cur_voxel = (int *)data_nram; - // nram_pts_idx_cur_voxel [align_max_pts_each_voxel] - T *nram_max_pts_feature_tmp = - (T *)((int *)nram_pts_idx_cur_voxel + align_max_pts_each_voxel); - // nram_max_pts_feature_tmp [align_max_pts_each_voxel] - T *nram_pts_feature_in_voxel = - ((T *)nram_max_pts_feature_tmp + align_max_pts_each_voxel); - // nram_pts_feature_in_voxel [nram_channels_limit, align_max_pts_each_voxel] - T *nram_pooled_features_cur_voxel = - ((T *)nram_pts_feature_in_voxel + - nram_channels_limit * align_max_pts_each_voxel); - // nram_pooled_features_cur_voxel [nram_channels_limit] - int *nram_argmax_cur_voxel = - (int *)((T *)nram_pooled_features_cur_voxel + nram_channels_limit); - // nram_argmax_cur_voxel [nram_channels_limit] - char *one_pooled_feature = - (char *)((int *)nram_argmax_cur_voxel + nram_channels_limit); - // one_pooled_feature [128] - int channels_loop_times = channels / nram_channels_limit; - int rem_channels = channels % nram_channels_limit; - for (int voxel_index = taskId; - voxel_index < boxes_num * out_x * out_y * out_z; - voxel_index += taskDim) { - int *pts_idx_cur_voxels = - (int *)pts_idx_of_voxels + voxel_index * max_pts_each_voxel; - __memcpy((void *)nram_pts_idx_cur_voxel, (void *)pts_idx_cur_voxels, - max_pts_each_voxel * sizeof(int), GDRAM2NRAM); - int pts_num_cur_voxel = nram_pts_idx_cur_voxel[0]; - if (pts_num_cur_voxel == 0) { - continue; - } - for (int channels_loop_idx = 0; channels_loop_idx <= channels_loop_times; - channels_loop_idx++) { - int actual_channels_num = (channels_loop_idx == channels_loop_times) - ? rem_channels - : nram_channels_limit; - if (actual_channels_num == 0) { - break; - } - int channels_offset = nram_channels_limit * channels_loop_idx; - -#if ((__BANG_ARCH__ >= 200) && (__BANG_ARCH__ < 300)) - int compute_channels_num = (channels_loop_idx == channels_loop_times) - ? PAD_UP(rem_channels, align_num) - : nram_channels_limit; - if (pool_method == 0) { - __bang_write_value((void *)nram_pts_feature_in_voxel, - compute_channels_num * align_max_pts_each_voxel, - (T)-INFINITY); - } -#endif - - T *pts_feature_cur_loop = (T *)pts_feature + channels_offset * pts_num; - for (int idx = 0; idx < pts_num_cur_voxel; idx++) { - __memcpy((T *)nram_pts_feature_in_voxel + idx, - (T *)pts_feature_cur_loop + nram_pts_idx_cur_voxel[idx + 1], - sizeof(T), GDRAM2NRAM, align_max_pts_each_voxel * sizeof(T), - pts_num * sizeof(T), actual_channels_num - 1); - } - for (int channel_idx = 0; channel_idx < actual_channels_num; - channel_idx++) { - if (pool_method == 0) { -#if __BANG_ARCH__ >= 322 - __bang_argmax((T *)one_pooled_feature, - (T *)nram_pts_feature_in_voxel + - channel_idx * align_max_pts_each_voxel, - pts_num_cur_voxel); - T max_val = ((T *)one_pooled_feature)[0]; - int max_idx = (int)(*(uint32_t *)((T *)one_pooled_feature + 1)); - nram_pooled_features_cur_voxel[channel_idx] = - (max_val == -INFINITY) ? 0 : max_val; - nram_argmax_cur_voxel[channel_idx] = - (max_val == -INFINITY) ? -1 : nram_pts_idx_cur_voxel[max_idx + 1]; -#else - // __bang_max need align num on mlu200 series - if (sizeof(T) == sizeof(float)) { - __bang_max((float *)one_pooled_feature, - (float *)nram_pts_feature_in_voxel + - channel_idx * align_max_pts_each_voxel, - align_max_pts_each_voxel); - float max_val = ((float *)one_pooled_feature)[0]; - __bang_write_value((void *)nram_max_pts_feature_tmp, - align_max_pts_each_voxel, (float)max_val); - __bang_eq((float *)nram_max_pts_feature_tmp, - (float *)nram_pts_feature_in_voxel + - channel_idx * align_max_pts_each_voxel, - (float *)nram_max_pts_feature_tmp, - align_max_pts_each_voxel); - int max_idx = (int)__bang_findfirst1( - (float *)nram_max_pts_feature_tmp, align_max_pts_each_voxel); - nram_pooled_features_cur_voxel[channel_idx] = - (max_val == -INFINITY) ? 0 : max_val; - nram_argmax_cur_voxel[channel_idx] = - (max_val == -INFINITY) ? -1 - : nram_pts_idx_cur_voxel[max_idx + 1]; - } else { - int max_idx = -1; - float max_val = -INFINITY; - for (int k = 0; k < pts_num_cur_voxel; k++) { - float pts_feature_cur_channel = __half2float_rd( - *((half *)nram_pts_feature_in_voxel + - channel_idx * align_max_pts_each_voxel + k)); - if (pts_feature_cur_channel > max_val) { - max_val = pts_feature_cur_channel; - max_idx = k; - } - } - nram_pooled_features_cur_voxel[channel_idx] = - (max_idx == -1) ? 0 : max_val; - nram_argmax_cur_voxel[channel_idx] = - (max_idx == -1) ? -1 : nram_pts_idx_cur_voxel[max_idx + 1]; - } -#endif - } else if (pool_method == 1) { - float sum_val_cur_channel = 0; - for (int k = 0; k < pts_num_cur_voxel; k++) { - sum_val_cur_channel += static_cast( - ((T *)nram_pts_feature_in_voxel)[channel_idx * - align_max_pts_each_voxel + - k]); - } - nram_pooled_features_cur_voxel[channel_idx] = - (T)(sum_val_cur_channel / pts_num_cur_voxel); - } - } - // store - __memcpy((T *)pooled_features + voxel_index * channels + channels_offset, - (void *)nram_pooled_features_cur_voxel, - actual_channels_num * sizeof(T), NRAM2GDRAM); - if (pool_method == 0) { - __memcpy((int *)argmax + voxel_index * channels + channels_offset, - (void *)nram_argmax_cur_voxel, - actual_channels_num * sizeof(int), NRAM2GDRAM); - } - } - } -} - -void KernelPtsIdxOfVoxels(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, const cnrtDataType_t d_type, - const int pool_method, const int boxes_num, - const int pts_num, const int max_pts_each_voxel, - const int out_x, const int out_y, const int out_z, - const void *rois, const void *pts, - int *pts_idx_of_voxels) { - switch (d_type) { - case CNRT_FLOAT32: { - MLUUnion1KernelPtsIdxOfVoxels<<>>( - pool_method, boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, - out_z, (float *)rois, (float *)pts, (int *)pts_idx_of_voxels); - }; break; - case CNRT_FLOAT16: { - MLUUnion1KernelPtsIdxOfVoxels<<>>( - pool_method, boxes_num, pts_num, max_pts_each_voxel, out_x, out_y, - out_z, (half *)rois, (half *)pts, (int *)pts_idx_of_voxels); - }; break; - default: { - break; - } - } -} - -void KernelRoiawarePool3dForward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const cnrtDataType_t d_type, const int pool_method, const int boxes_num, - const int pts_num, const int channels, const int max_pts_each_voxel, - const int out_x, const int out_y, const int out_z, const void *pts_feature, - const int *pts_idx_of_voxels, void *pooled_features, int *argmax) { - switch (d_type) { - case CNRT_FLOAT32: { - MLUUnion1KernelRoiawarePool3dForward<<>>( - pool_method, boxes_num, pts_num, channels, max_pts_each_voxel, out_x, - out_y, out_z, (float *)pts_feature, (int *)pts_idx_of_voxels, - (float *)pooled_features, (int *)argmax); - }; break; - case CNRT_FLOAT16: { - MLUUnion1KernelRoiawarePool3dForward<<>>( - pool_method, boxes_num, pts_num, channels, max_pts_each_voxel, out_x, - out_y, out_z, (half *)pts_feature, (int *)pts_idx_of_voxels, - (half *)pooled_features, (int *)argmax); - }; break; - default: { - break; - } - } -} - -template -__mlu_global__ void MLUUnion1KernelRoiawareMaxPool3dBackward( - const int boxes_num, const int out_x, const int out_y, const int out_z, - const int channels, const int *argmax, const T *grad_out, T *grad_in) { - // params (int)argmax: (boxes_num, out_x, out_y, out_z, channels) - // params (T)grad_out: (boxes_num, out_x, out_y, out_z, channels) - // params (T)grad_in: (pts_num, channels) - - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - int nram_channels_limit = - (MAX_NRAM_SIZE - sizeof(T) * 1) / (sizeof(T) + sizeof(int)); - int *nram_argmax_cur_loop = (int *)data_nram; - // nram_argmax_cur_loop [nram_channels_limit] - T *nram_grad_out_cur_loop = - (T *)((int *)nram_argmax_cur_loop + nram_channels_limit); - // nram_grad_out_cur_loop [nram_channels_limit] - T *nram_grad_in_cur_channel = - (T *)nram_grad_out_cur_loop + nram_channels_limit; - // nram_grad_in_cur_channel [1] - int channels_loop_times = channels / nram_channels_limit; - int rem_channels = channels % nram_channels_limit; - int voxels_num = boxes_num * out_x * out_y * out_z; - - for (int voxel_index = taskId; voxel_index < voxels_num; - voxel_index += taskDim) { - const int *argmax_cur_voxel = argmax + voxel_index * channels; - const T *grad_out_cur_voxel = grad_out + voxel_index * channels; - - for (int channels_loop_idx = 0; channels_loop_idx <= channels_loop_times; - channels_loop_idx++) { - int actual_channels_num = (channels_loop_idx == channels_loop_times) - ? rem_channels - : nram_channels_limit; - if (actual_channels_num == 0) { - break; - } - const int *argmax_cur_loop = - argmax_cur_voxel + nram_channels_limit * channels_loop_idx; - const T *grad_out_cur_loop = - grad_out_cur_voxel + nram_channels_limit * channels_loop_idx; - __memcpy((void *)nram_argmax_cur_loop, (void *)argmax_cur_loop, - actual_channels_num * sizeof(int), GDRAM2NRAM); - __memcpy((void *)nram_grad_out_cur_loop, (void *)grad_out_cur_loop, - actual_channels_num * sizeof(T), GDRAM2NRAM); - - for (int channel_idx = 0; channel_idx < actual_channels_num; - channel_idx++) { - int *nram_argmax_cur_channel = nram_argmax_cur_loop + channel_idx; - T *nram_grad_out_cur_channel = nram_grad_out_cur_loop + channel_idx; - if (nram_argmax_cur_channel[0] == -1) { - continue; - } - T *grad_in_cur_channel = - grad_in + nram_argmax_cur_channel[0] * channels + - nram_channels_limit * channels_loop_idx + channel_idx; - __bang_atomic_add((T *)nram_grad_in_cur_channel, - (T *)grad_in_cur_channel, - (T *)(nram_grad_out_cur_channel), 1); - } - } - } -} - -template -__mlu_global__ void MLUUnion1KernelRoiawareAvgPool3dBackward( - const int boxes_num, const int out_x, const int out_y, const int out_z, - const int channels, const int max_pts_each_voxel, - const int *pts_idx_of_voxels, const T *grad_out, T *grad_in) { - // params (int)pts_idx_of_voxels: (boxes_num, out_x, out_y, out_z, - // max_pts_each_voxel) params (T)grad_out: (boxes_num, out_x, out_y, out_z, - // channels) params (T)grad_in: (pts_num, channels) - - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - int align_num = NFU_ALIGN_SIZE / sizeof(T); - int align_max_pts_each_voxel = PAD_UP(max_pts_each_voxel, align_num); - int nram_channels_limit = PAD_DOWN( - (MAX_NRAM_SIZE - align_max_pts_each_voxel * sizeof(int)) / 2 / sizeof(T), - align_num); - int *nram_pts_idx_cur_voxel = (int *)data_nram; - // nram_pts_idx_cur_voxel [align_max_pts_each_voxel] - T *nram_grad_out_cur_loop = - (T *)((int *)nram_pts_idx_cur_voxel + align_max_pts_each_voxel); - // nram_grad_out_cur_loop [nram_channels_limit] - T *nram_grad_in_cur_loop = (T *)nram_grad_out_cur_loop + nram_channels_limit; - // nram_grad_in_cur_loop [nram_channels_limit] - int channels_loop_times = channels / nram_channels_limit; - int rem_channels = channels % nram_channels_limit; - int voxels_num = boxes_num * out_x * out_y * out_z; - - for (int voxel_index = taskId; voxel_index < voxels_num; - voxel_index += taskDim) { - const T *grad_out_cur_voxel = grad_out + voxel_index * channels; - const int *pts_idx_cur_voxel = - pts_idx_of_voxels + voxel_index * max_pts_each_voxel; - __memcpy((void *)nram_pts_idx_cur_voxel, (void *)pts_idx_cur_voxel, - max_pts_each_voxel * sizeof(int), GDRAM2NRAM); - int total_pts_of_voxel = nram_pts_idx_cur_voxel[0]; - if (total_pts_of_voxel <= 0) { - continue; - } - float cur_grad = 1.0 / ((float)total_pts_of_voxel); - - for (int channels_loop_idx = 0; channels_loop_idx <= channels_loop_times; - channels_loop_idx++) { - int actual_channels_num = (channels_loop_idx == channels_loop_times) - ? rem_channels - : nram_channels_limit; - if (actual_channels_num == 0) { - break; - } - const T *grad_out_cur_loop = - grad_out_cur_voxel + nram_channels_limit * channels_loop_idx; - __memcpy((void *)nram_grad_in_cur_loop, (void *)grad_out_cur_loop, - actual_channels_num * sizeof(T), GDRAM2NRAM); - - int align_actual_channels_num = PAD_UP(actual_channels_num, align_num); - - if (sizeof(T) == sizeof(half)) { - __bang_half2float((float *)nram_grad_out_cur_loop, - (half *)nram_grad_in_cur_loop, - align_actual_channels_num); - __bang_mul_scalar((float *)nram_grad_out_cur_loop, - (float *)nram_grad_out_cur_loop, (float)cur_grad, - align_actual_channels_num); - convertFloat2half((half *)nram_grad_out_cur_loop, - (float *)nram_grad_out_cur_loop, - align_actual_channels_num); - } else { - __bang_mul_scalar((float *)nram_grad_out_cur_loop, - (float *)nram_grad_in_cur_loop, (float)cur_grad, - align_actual_channels_num); - } - for (int k = 1; k <= total_pts_of_voxel; k++) { - T *grad_in_cur_loop = grad_in + nram_pts_idx_cur_voxel[k] * channels + - nram_channels_limit * channels_loop_idx; - __bang_atomic_add((T *)nram_grad_in_cur_loop, (T *)grad_in_cur_loop, - (T *)nram_grad_out_cur_loop, actual_channels_num); - } - } - } -} - -void KernelRoiawarePool3dBackward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const cnrtDataType_t d_type, const int pool_method, const int boxes_num, - const int out_x, const int out_y, const int out_z, const int channels, - const int max_pts_each_voxel, const int *pts_idx_of_voxels, - const int *argmax, const void *grad_out, void *grad_in) { - if (pool_method == 0) { - switch (d_type) { - case CNRT_FLOAT32: { - MLUUnion1KernelRoiawareMaxPool3dBackward - <<>>(boxes_num, out_x, out_y, out_z, channels, - (int *)argmax, (float *)grad_out, - (float *)grad_in); - }; break; - case CNRT_FLOAT16: { - MLUUnion1KernelRoiawareMaxPool3dBackward - <<>>(boxes_num, out_x, out_y, out_z, channels, - (int *)argmax, (half *)grad_out, - (half *)grad_in); - }; break; - default: { - break; - } - } - } else { - switch (d_type) { - case CNRT_FLOAT32: { - MLUUnion1KernelRoiawareAvgPool3dBackward - <<>>( - boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, - (int *)pts_idx_of_voxels, (float *)grad_out, (float *)grad_in); - }; break; - case CNRT_FLOAT16: { - MLUUnion1KernelRoiawareAvgPool3dBackward - <<>>( - boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel, - (int *)pts_idx_of_voxels, (half *)grad_out, (half *)grad_in); - }; break; - default: { - break; - } - } - } -} diff --git a/mmcv/ops/csrc/common/mlu/roipoint_pool3d_large_boxes_num_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/roipoint_pool3d_large_boxes_num_mlu_kernel.mlu deleted file mode 100644 index 58a15d8..0000000 --- a/mmcv/ops/csrc/common/mlu/roipoint_pool3d_large_boxes_num_mlu_kernel.mlu +++ /dev/null @@ -1,536 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * OR IMPLIED, INCLUDING BUvoid NOKType LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENvoid SHALL THE AUTHORS OR COPYRIGHKType HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORvoid OR OTHERWISE, ARISING FROM, OUKType OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ - -#include "common_mlu_helper.hpp" - -/************************************************************************* - * - * NRAM partition: - * | boxes3d | ping points + pong points | aux_a ~ aux_f | - * | 7 * sizeof(T) | 6 * deal_num * sizeof(T) | 6 * deal_num * sizeof(T) | - * - *************************************************************************/ -#define TWELVE_SPLIT 12 - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -template -__mlu_func__ void checkPointsInBox3d(const T *boxes3d, - const size_t deal_num, - T *x, - T *y, - T *z, - T *auxiliary_a, - T *auxiliary_b, - T *auxiliary_c, - T *auxiliary_d, - T *auxiliary_e, - T *auxiliary_f, - T *pts_assign) { - // param box3d: (cx, cy, cz, dx, dy, dz, rz) in LiDAR coordinate - T cx = boxes3d[0]; - T cy = boxes3d[1]; - T cz = boxes3d[2]; - T dx = boxes3d[3]; - T dy = boxes3d[4]; - T dz = boxes3d[5]; - T rz = boxes3d[6]; - // shift to the center since cz in box3d is the bottom center - cz += 0.5 * dz; - - T cosa = (T)std::cos(-rz); - T sina = (T)std::sin(-rz); - - // x - cx - __bang_sub_scalar((T *)auxiliary_a, (T *)x, (T)cx, deal_num); - // y - cy - __bang_sub_scalar((T *)auxiliary_b, (T *)y, (T)cy, deal_num); - // z - cz - __bang_sub_scalar((T *)auxiliary_c, (T *)z, (T)cz, deal_num); - // |z - cz| - __bang_active_abs((T *)auxiliary_c, (T *)auxiliary_c, deal_num); - // |z - cz| > dz / 2.0 -#if __BANG_ARCH__ >= 322 - __bang_gt_scalar((T *)auxiliary_c, (T *)auxiliary_c, (T)(0.5 * dz), deal_num); -#else - __bang_write_value((T *)auxiliary_d, deal_num, (T)(0.5 * dz)); - __bang_lt((T *)auxiliary_c, (T *)auxiliary_d, (T *)auxiliary_c, deal_num); -#endif - // !(|z - cz| > dz / 2.0) - __bang_not((T *)auxiliary_c, (T *)auxiliary_c, deal_num); - // (x - cx) * cos(-rz) - __bang_mul_scalar((T *)auxiliary_d, (T *)auxiliary_a, (T)cosa, deal_num); - // (y - cy) * sin(-rz) - __bang_mul_scalar((T *)auxiliary_e, (T *)auxiliary_b, (T)sina, deal_num); - // local_x = (x - cx) * cos(-rz) + (y - cy) * -sin(-rz) - __bang_sub((T *)auxiliary_d, (T *)auxiliary_d, (T *)auxiliary_e, deal_num); - // |local_x| - __bang_active_abs((T *)auxiliary_d, (T *)auxiliary_d, deal_num); - // |local_x| < dx / 2.0 -#if __BANG_ARCH__ >= 322 - __bang_lt_scalar(auxiliary_d, auxiliary_d, (T)(0.5 * dx), deal_num); -#else - __bang_write_value((T *)auxiliary_e, deal_num, (T)(0.5 * dx)); - __bang_gt((T *)auxiliary_d, (T *)auxiliary_e, (T *)auxiliary_d, deal_num); -#endif - // (x - cx) * sin(-rz) - __bang_mul_scalar((T *)auxiliary_e, (T *)auxiliary_a, (T)sina, deal_num); - // (y - cy) * cos(-rz) - __bang_mul_scalar((T *)auxiliary_f, (T *)auxiliary_b, (T)cosa, deal_num); - // local_y = (x - cx) * sin(-rz) + (y - cy) * cos(-rz) - __bang_add((T *)auxiliary_e, (T *)auxiliary_e, (T *)auxiliary_f, deal_num); - // |local_y| - __bang_active_abs((T *)auxiliary_e, (T *)auxiliary_e, deal_num); - // |local_y| < dy / 2.0 -#if __BANG_ARCH__ >= 322 - __bang_lt_scalar(auxiliary_e, auxiliary_e, (T)(0.5 * dy), deal_num); -#else - __bang_write_value((T *)auxiliary_f, deal_num, (T)(0.5 * dy)); - __bang_gt((T *)auxiliary_e, (T *)auxiliary_f, (T *)auxiliary_e, deal_num); -#endif - // pts_assign = |x - cx| < dx / 2.0 && |y - cy| < dy / 2.0 && |z - cz| <= dz / 2.0 - __bang_mul((T *)pts_assign, (T *)auxiliary_c, (T *)auxiliary_d, deal_num); - __bang_mul((T *)pts_assign, (T *)pts_assign, (T *)auxiliary_e, deal_num); -} - -template -__mlu_func__ void computeStoreRoipointPool3d(char *boxes3d, - int *cnt, - char *points_x, - char *points_y, - char *points_z, - const char *point_features, - char *auxiliary_a, - char *auxiliary_b, - char *auxiliary_c, - char *auxiliary_d, - char *auxiliary_e, - char *auxiliary_f, - const int box_idx, - const int pts_num, - const int feature_in_len, - const int sampled_pts_num, - const size_t span_num_deal, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram) { - char *pts_assign = auxiliary_a; - if (*cnt >= sampled_pts_num) { - return; - } - checkPointsInBox3d((T *)boxes3d, span_num_deal, (T *)points_x, (T *)points_y, (T *)points_z, - (T *)auxiliary_a, (T *)auxiliary_b, (T *)auxiliary_c, (T *)auxiliary_d, - (T *)auxiliary_e, (T *)auxiliary_f, (T *)pts_assign); - - // __bang_select returns selected elements vector and the number of selected elements - __bang_select((T *)auxiliary_b, (T *)points_x, (T *)pts_assign, span_num_deal); - uint32_t select_num = *((uint32_t *)auxiliary_b); - - if (select_num == 0) { - return; - } - int sampled_pts_num_rem = sampled_pts_num - *cnt; - int segnum = min((int)select_num, sampled_pts_num_rem) - 1; - - // copy x to pooled_features_gdram - // The result of __bang_select is composed of three parts: - // The first 4-byte is the number of selected element, whose data type is unsigned int. - // The next 124-byte is zero. The rest bytes are the selected elements. - int select_num_size = 128; - __memcpy( - pooled_features_gdram + (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T), - (T *)((int8_t *)auxiliary_b + select_num_size), sizeof(T), NRAM2GDRAM, - (3 + feature_in_len) * sizeof(T), sizeof(T), segnum); - - // copy y to pooled_features_gdram - __bang_collect((T *)auxiliary_d, (T *)points_y, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T) + - 1 * sizeof(T), - (T *)auxiliary_d, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - - // copy z to pooled_features_gdram - __bang_collect((T *)auxiliary_e, (T *)points_z, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T) + - 2 * sizeof(T), - (T *)auxiliary_e, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - - // copy features to pooled_features_gdram - for (int c_idx = 0; c_idx < feature_in_len; c_idx++) { - __memcpy(auxiliary_d, point_features + c_idx * pts_num * sizeof(T), span_num_deal * sizeof(T), - GDRAM2NRAM); - __bang_collect((T *)auxiliary_e, (T *)auxiliary_d, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T) + - (3 + c_idx) * sizeof(T), - auxiliary_e, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - } - - *cnt += select_num; -} - -template -__mlu_func__ void computeStoreLastBlockRoipointPool3d(char *boxes3d, - int *cnt, - char *points_x, - char *points_y, - char *points_z, - const char *point_features, - char *auxiliary_a, - char *auxiliary_b, - char *auxiliary_c, - char *auxiliary_d, - char *auxiliary_e, - char *auxiliary_f, - const int box_idx, - const int pts_num, - const int feature_in_len, - const int sampled_pts_num, - const size_t span_num_deal, - const size_t auxiliary_num_deal, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram) { - char *pts_assign = auxiliary_a; - if (*cnt >= sampled_pts_num) { - // pooled_empty_flag_gdram set 0 - *((int *)auxiliary_a) = 0; - __memcpy(pooled_empty_flag_gdram + box_idx * sizeof(int), auxiliary_a, sizeof(int), NRAM2GDRAM); - return; - } - checkPointsInBox3d((T *)boxes3d, span_num_deal, (T *)points_x, (T *)points_y, (T *)points_z, - (T *)auxiliary_a, (T *)auxiliary_b, (T *)auxiliary_c, (T *)auxiliary_d, - (T *)auxiliary_e, (T *)auxiliary_f, (T *)pts_assign); - - // __bang_select returns selected elements vector and the number of selected elements - __bang_select((T *)auxiliary_b, (T *)points_x, (T *)pts_assign, span_num_deal); - uint32_t select_num = *((uint32_t *)auxiliary_b); - - if (*cnt + select_num == 0) { - // pooled_empty_flag_gdram set 1 - *((int *)auxiliary_a) = 1; - __memcpy(pooled_empty_flag_gdram + box_idx * sizeof(int), auxiliary_a, sizeof(int), NRAM2GDRAM); - - // pooled_features_gdram set 0 - int repeat = (sampled_pts_num * (3 + feature_in_len)) / (auxiliary_num_deal * 6); - int rem = (sampled_pts_num * (3 + feature_in_len)) % (auxiliary_num_deal * 6); - // use auxiliary_a to auxiliary_f - __bang_write_zero((T *)auxiliary_a, PAD_UP(auxiliary_num_deal * 6, NFU_ALIGN_SIZE)); - if (repeat > 0) { - __memcpy(pooled_features_gdram + box_idx * sampled_pts_num * (3 + feature_in_len) * sizeof(T), - auxiliary_a, auxiliary_num_deal * 6 * sizeof(T), NRAM2GDRAM, - auxiliary_num_deal * 6 * sizeof(T), 0, repeat - 1); - } - if (rem > 0) { - __memcpy(pooled_features_gdram + - box_idx * sampled_pts_num * (3 + feature_in_len) * sizeof(T) + - repeat * auxiliary_num_deal * 6 * sizeof(T), - auxiliary_a, rem * sizeof(T), NRAM2GDRAM); - } - return; - } - - if (select_num > 0) { - int sampled_pts_num_rem = sampled_pts_num - *cnt; - int segnum = min((int)select_num, sampled_pts_num_rem) - 1; - - // copy x to pooled_features_gdram - // The result of __bang_select is composed of three parts: - // The first 4-byte is the number of selected element, whose data type is unsigned int. - // The next 124-byte is zero. The rest bytes are the selected elements. - int select_num_size = 128; - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T), - (T *)((int8_t *)auxiliary_b + select_num_size), sizeof(T), NRAM2GDRAM, - (3 + feature_in_len) * sizeof(T), sizeof(T), segnum); - - // copy y to pooled_features_gdram - __bang_collect((T *)auxiliary_d, (T *)points_y, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T) + - 1 * sizeof(T), - (T *)auxiliary_d, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - - // copy z to pooled_features_gdram - __bang_collect((T *)auxiliary_e, (T *)points_z, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T) + - 2 * sizeof(T), - (T *)auxiliary_e, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - - // copy features to pooled_features_gdram - for (int c_idx = 0; c_idx < feature_in_len; c_idx++) { - __memcpy(auxiliary_d, point_features + c_idx * pts_num * sizeof(T), span_num_deal * sizeof(T), - GDRAM2NRAM); - __bang_collect((T *)auxiliary_e, (T *)auxiliary_d, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T) + - (3 + c_idx) * sizeof(T), - auxiliary_e, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - } - } - - // pooled_empty_flag_gdram set 0 - *((int *)auxiliary_a) = 0; - __memcpy(pooled_empty_flag_gdram + box_idx * sizeof(int), auxiliary_a, sizeof(int), NRAM2GDRAM); - - *cnt += select_num; - if (*cnt < sampled_pts_num) { - // duplicate same points for sampling - int repeat = sampled_pts_num / (*cnt) - 1; - int rem = sampled_pts_num % (*cnt); - if (repeat > 0) { - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + *cnt) * (3 + feature_in_len) * sizeof(T), - pooled_features_gdram + box_idx * sampled_pts_num * (3 + feature_in_len) * sizeof(T), - (*cnt) * (3 + feature_in_len) * sizeof(T), GDRAM2GDRAM, - (*cnt) * (3 + feature_in_len) * sizeof(T), 0, repeat - 1); - } - if (rem > 0) { - __memcpy( - pooled_features_gdram + - (box_idx * sampled_pts_num + (repeat + 1) * (*cnt)) * (3 + feature_in_len) * - sizeof(T), - pooled_features_gdram + box_idx * sampled_pts_num * (3 + feature_in_len) * sizeof(T), - rem * (3 + feature_in_len) * sizeof(T), GDRAM2GDRAM); - } - } -} - -template -__mlu_global__ void MLUUnion1KernelRoiPointPool3dLargeBoxesNumForward( - const int batch_size, - const int pts_num, - const int boxes_num, - const int feature_in_len, - const int sampled_pts_num, - const char *points_xyz_gdram, - const char *point_features_gdram, - const char *boxes3d_gdram, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram) { - if (coreId == 0x80) { - return; - } - size_t boxes_per_core = (batch_size * boxes_num) / taskDim; - size_t boxes_rem = (batch_size * boxes_num) % taskDim; - // calc batch_start, batch_end, first_batch_box_start, last batch_box_end for each core - int32_t batch_start = taskId < (boxes_rem + 1) ? - (taskId * (boxes_per_core + 1)) / boxes_num : - (taskId * boxes_per_core + boxes_rem) / boxes_num; - int32_t batch_end = taskId < boxes_rem ? - ((taskId + 1) * (boxes_per_core + 1) - 1) / boxes_num : - ((taskId + 1) * boxes_per_core + boxes_rem - 1) / boxes_num; - size_t first_batch_box_start = taskId < (boxes_rem + 1) ? - (taskId * (boxes_per_core + 1)) - batch_start * boxes_num : - taskId * boxes_per_core + boxes_rem - batch_start * boxes_num; - size_t last_batch_box_end = taskId < boxes_rem ? - (taskId + 1) * (boxes_per_core + 1) - batch_end * boxes_num : - ((taskId + 1) * boxes_per_core + boxes_rem) - batch_end * boxes_num; - - // points_xyz : [3, B, N] - const char *points_x_gdram = points_xyz_gdram; - const char *points_y_gdram = points_xyz_gdram + (1 * batch_size * pts_num) * sizeof(T); - const char *points_z_gdram = points_xyz_gdram + (2 * batch_size * pts_num) * sizeof(T); - - size_t boxes3d_size = PAD_UP(7, NFU_ALIGN_SIZE) * sizeof(T); - size_t span_num_deal = PAD_DOWN(MAX_NRAM_SIZE / TWELVE_SPLIT / sizeof(T), NFU_ALIGN_SIZE); - size_t align_num = NFU_ALIGN_SIZE; - int32_t repeat = pts_num / span_num_deal; - size_t rem = pts_num % span_num_deal; - size_t align_rem = CEIL_ALIGN(rem, align_num); - char *boxes3d = nram_buffer; - char *ping_points_x = nram_buffer + boxes3d_size; - char *ping_points_y = ping_points_x + span_num_deal * sizeof(T); - char *ping_points_z = ping_points_y + span_num_deal * sizeof(T); - size_t ping_pong_gap = 3 * span_num_deal * sizeof(T); - char *auxiliary_a = ping_points_x + 2 * ping_pong_gap; - char *auxiliary_b = auxiliary_a + span_num_deal * sizeof(T); - char *auxiliary_c = auxiliary_b + span_num_deal * sizeof(T); - char *auxiliary_d = auxiliary_c + span_num_deal * sizeof(T); - char *auxiliary_e = auxiliary_d + span_num_deal * sizeof(T); - char *auxiliary_f = auxiliary_e + span_num_deal * sizeof(T); - size_t span_load_input1_size = span_num_deal * sizeof(T); - size_t span_load_input2_size = span_num_deal * sizeof(T); - size_t span_load_input3_size = span_num_deal * sizeof(T); - size_t span_load_input4_size = span_num_deal * sizeof(T); - int cnt = 0; - - for (int bs_idx = batch_start; bs_idx <= batch_end; bs_idx++) { - const char *points_x_start = points_x_gdram + bs_idx * pts_num * sizeof(T); - const char *points_y_start = points_y_gdram + bs_idx * pts_num * sizeof(T); - const char *points_z_start = points_z_gdram + bs_idx * pts_num * sizeof(T); - const char *point_features_start = - point_features_gdram + bs_idx * feature_in_len * pts_num * sizeof(T); - char *pooled_features_start = - pooled_features_gdram + - (bs_idx * boxes_num * sampled_pts_num * (3 + feature_in_len)) * sizeof(T); - char *pooled_empty_flag_start = pooled_empty_flag_gdram + bs_idx * boxes_num * sizeof(int); - size_t box_start = bs_idx == batch_start ? first_batch_box_start : 0; - size_t box_end = bs_idx == batch_end ? last_batch_box_end : boxes_num; - - for (int box_idx = box_start; box_idx < box_end; box_idx++) { - __memcpy_async(boxes3d, - boxes3d_gdram + bs_idx * boxes_num * 7 * sizeof(T) + box_idx * 7 * sizeof(T), - 7 * sizeof(T), GDRAM2NRAM); - cnt = 0; - if (repeat > 0) { - __memcpy_async(ping_points_x, points_x_start, span_load_input1_size, GDRAM2NRAM); - __memcpy_async(ping_points_y, points_y_start, span_load_input2_size, GDRAM2NRAM); - __memcpy_async(ping_points_z, points_z_start, span_load_input3_size, GDRAM2NRAM); - __asm__ volatile("sync;"); - } - - for (int i = 0; i < repeat - 1; i++) { - __memcpy_async(ping_points_x + ((i + 1) % 2) * ping_pong_gap, - points_x_start + (i + 1) * span_load_input1_size, span_load_input1_size, - GDRAM2NRAM); - __memcpy_async(ping_points_y + ((i + 1) % 2) * ping_pong_gap, - points_y_start + (i + 1) * span_load_input2_size, span_load_input2_size, - GDRAM2NRAM); - __memcpy_async(ping_points_z + ((i + 1) % 2) * ping_pong_gap, - points_z_start + (i + 1) * span_load_input3_size, span_load_input3_size, - GDRAM2NRAM); - computeStoreRoipointPool3d( - boxes3d, &cnt, ping_points_x + (i % 2) * ping_pong_gap, - ping_points_y + (i % 2) * ping_pong_gap, ping_points_z + (i % 2) * ping_pong_gap, - point_features_start + i * span_load_input4_size, auxiliary_a, auxiliary_b, auxiliary_c, - auxiliary_d, auxiliary_e, auxiliary_f, box_idx, pts_num, feature_in_len, - sampled_pts_num, span_num_deal, pooled_features_start, pooled_empty_flag_start); - __asm__ volatile("sync;"); - } - - if (rem > 0) { - if (sizeof(T) == sizeof(float)) { - __bang_write_value((T *)(ping_points_x + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - __bang_write_value((T *)(ping_points_y + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - __bang_write_value((T *)(ping_points_z + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - } else { - __bang_write_value((T *)(ping_points_x + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - __bang_write_value((T *)(ping_points_y + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - __bang_write_value((T *)(ping_points_z + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - } - __memcpy_async(ping_points_x + (repeat % 2) * ping_pong_gap, - points_x_start + repeat * span_load_input1_size, rem * sizeof(T), - GDRAM2NRAM); - __memcpy_async(ping_points_y + (repeat % 2) * ping_pong_gap, - points_y_start + repeat * span_load_input2_size, rem * sizeof(T), - GDRAM2NRAM); - __memcpy_async(ping_points_z + (repeat % 2) * ping_pong_gap, - points_z_start + repeat * span_load_input3_size, rem * sizeof(T), - GDRAM2NRAM); - } - - if (repeat > 0 && rem > 0) { - computeStoreRoipointPool3d( - boxes3d, &cnt, ping_points_x + ((repeat - 1) % 2) * ping_pong_gap, - ping_points_y + ((repeat - 1) % 2) * ping_pong_gap, - ping_points_z + ((repeat - 1) % 2) * ping_pong_gap, - point_features_start + (repeat - 1) * span_load_input4_size, auxiliary_a, auxiliary_b, - auxiliary_c, auxiliary_d, auxiliary_e, auxiliary_f, box_idx, pts_num, feature_in_len, - sampled_pts_num, span_num_deal, pooled_features_start, pooled_empty_flag_start); - } else if (repeat > 0 && rem == 0) { - computeStoreLastBlockRoipointPool3d( - boxes3d, &cnt, ping_points_x + ((repeat - 1) % 2) * ping_pong_gap, - ping_points_y + ((repeat - 1) % 2) * ping_pong_gap, - ping_points_z + ((repeat - 1) % 2) * ping_pong_gap, - point_features_start + (repeat - 1) * span_load_input4_size, auxiliary_a, auxiliary_b, - auxiliary_c, auxiliary_d, auxiliary_e, auxiliary_f, box_idx, pts_num, feature_in_len, - sampled_pts_num, span_num_deal, span_num_deal, pooled_features_start, - pooled_empty_flag_start); - } - - if (rem > 0) { - __asm__ volatile("sync;"); - computeStoreLastBlockRoipointPool3d( - boxes3d, &cnt, ping_points_x + (repeat % 2) * ping_pong_gap, - ping_points_y + (repeat % 2) * ping_pong_gap, - ping_points_z + (repeat % 2) * ping_pong_gap, - point_features_start + repeat * span_load_input4_size, auxiliary_a, auxiliary_b, - auxiliary_c, auxiliary_d, auxiliary_e, auxiliary_f, box_idx, pts_num, feature_in_len, - sampled_pts_num, align_rem, span_num_deal, pooled_features_start, - pooled_empty_flag_start); - } - } - } -} - -template __mlu_global__ void MLUUnion1KernelRoiPointPool3dLargeBoxesNumForward( - const int batch_size, - const int pts_num, - const int boxes_num, - const int feature_in_len, - const int sampled_pts_num, - const char *points_xyz_gdram, - const char *point_features_gdram, - const char *boxes3d_gdram, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram); - -template __mlu_global__ void MLUUnion1KernelRoiPointPool3dLargeBoxesNumForward( - const int batch_size, - const int pts_num, - const int boxes_num, - const int feature_in_len, - const int sampled_pts_num, - const char *points_xyz_gdram, - const char *point_features_gdram, - const char *boxes3d_gdram, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram); - -void KernelRoiPointPool3dLargeBoxesNumForward(cnrtDim3_t k_dim, - cnrtFunctionType_t k_type, - cnrtQueue_t queue, - const cnrtDataType_t d_type, - const int batch_size, - const int pts_num, - const int boxes_num, - const int feature_in_len, - const int sampled_pts_num, - const void *points_xyz, - const void *boxes3d, - const void *point_features, - void *pooled_features, - int *pooled_empty_flag) { - switch (d_type) { - default: { break; } - case CNRT_FLOAT32: { - MLUUnion1KernelRoiPointPool3dLargeBoxesNumForward<<>>( - batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, - (char *)points_xyz, (char *)point_features, (char *)boxes3d, - (char *)pooled_features, (char *)pooled_empty_flag); - }; break; - case CNRT_FLOAT16: { - MLUUnion1KernelRoiPointPool3dLargeBoxesNumForward<<>>( - batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, - (char *)points_xyz, (char *)point_features, (char *)boxes3d, - (char *)pooled_features, (char *)pooled_empty_flag); - }; break; - } -} diff --git a/mmcv/ops/csrc/common/mlu/roipoint_pool3d_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/roipoint_pool3d_mlu_kernel.mlu deleted file mode 100644 index f16d840..0000000 --- a/mmcv/ops/csrc/common/mlu/roipoint_pool3d_mlu_kernel.mlu +++ /dev/null @@ -1,544 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * OR IMPLIED, INCLUDING BUvoid NOKType LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENvoid SHALL THE AUTHORS OR COPYRIGHKType HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORvoid OR OTHERWISE, ARISING FROM, OUKType OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ - -#include "common_mlu_helper.hpp" - -/************************************************************************************** - * - * NRAM partition: - * | boxes3d | cnt | - * | boxes_num * 7 * sizeof(T) | boxes_num * sizeof(int) | - * - * | ping points | pong points | aux_a ~ aux_f | - * | 3 * deal_num * sizeof(T) | 3 * deal_num * sizeof(T) | 6 * deal_num * sizeof(T) | - * - ***************************************************************************************/ -#define TWELVE_SPLIT 12 - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -template -__mlu_func__ void checkPointsInBox3d(const T *boxes3d, - const size_t deal_num, - T *x, - T *y, - T *z, - T *auxiliary_a, - T *auxiliary_b, - T *auxiliary_c, - T *auxiliary_d, - T *auxiliary_e, - T *auxiliary_f, - T *pts_assign) { - // param box3d: (cx, cy, cz, dx, dy, dz, rz) in LiDAR coordinate - T cx = boxes3d[0]; - T cy = boxes3d[1]; - T cz = boxes3d[2]; - T dx = boxes3d[3]; - T dy = boxes3d[4]; - T dz = boxes3d[5]; - T rz = boxes3d[6]; - // shift to the center since cz in box3d is the bottom center - cz += 0.5 * dz; - - T cosa = (T)std::cos(-rz); - T sina = (T)std::sin(-rz); - - // x - cx - __bang_sub_scalar((T *)auxiliary_a, (T *)x, (T)cx, deal_num); - // y - cy - __bang_sub_scalar((T *)auxiliary_b, (T *)y, (T)cy, deal_num); - // z - cz - __bang_sub_scalar((T *)auxiliary_c, (T *)z, (T)cz, deal_num); - // |z - cz| - __bang_active_abs((T *)auxiliary_c, (T *)auxiliary_c, deal_num); - // |z - cz| > dz / 2.0 -#if __BANG_ARCH__ >= 322 - __bang_gt_scalar((T *)auxiliary_c, (T *)auxiliary_c, (T)(0.5 * dz), deal_num); -#else - __bang_write_value((T *)auxiliary_d, deal_num, (T)(0.5 * dz)); - __bang_lt((T *)auxiliary_c, (T *)auxiliary_d, (T *)auxiliary_c, deal_num); -#endif - // !(|z - cz| > dz / 2.0) - __bang_not((T *)auxiliary_c, (T *)auxiliary_c, deal_num); - // (x - cx) * cos(-rz) - __bang_mul_scalar((T *)auxiliary_d, (T *)auxiliary_a, (T)cosa, deal_num); - // (y - cy) * sin(-rz) - __bang_mul_scalar((T *)auxiliary_e, (T *)auxiliary_b, (T)sina, deal_num); - // local_x = (x - cx) * cos(-rz) + (y - cy) * -sin(-rz) - __bang_sub((T *)auxiliary_d, (T *)auxiliary_d, (T *)auxiliary_e, deal_num); - // |local_x| - __bang_active_abs((T *)auxiliary_d, (T *)auxiliary_d, deal_num); - // |local_x| < dx / 2.0 -#if __BANG_ARCH__ >= 322 - __bang_lt_scalar(auxiliary_d, auxiliary_d, (T)(0.5 * dx), deal_num); -#else - __bang_write_value((T *)auxiliary_e, deal_num, (T)(0.5 * dx)); - __bang_gt((T *)auxiliary_d, (T *)auxiliary_e, (T *)auxiliary_d, deal_num); -#endif - // (x - cx) * sin(-rz) - __bang_mul_scalar((T *)auxiliary_e, (T *)auxiliary_a, (T)sina, deal_num); - // (y - cy) * cos(-rz) - __bang_mul_scalar((T *)auxiliary_f, (T *)auxiliary_b, (T)cosa, deal_num); - // local_y = (x - cx) * sin(-rz) + (y - cy) * cos(-rz) - __bang_add((T *)auxiliary_e, (T *)auxiliary_e, (T *)auxiliary_f, deal_num); - // |local_y| - __bang_active_abs((T *)auxiliary_e, (T *)auxiliary_e, deal_num); - // |local_y| < dy / 2.0 -#if __BANG_ARCH__ >= 322 - __bang_lt_scalar(auxiliary_e, auxiliary_e, (T)(0.5 * dy), deal_num); -#else - __bang_write_value((T *)auxiliary_f, deal_num, (T)(0.5 * dy)); - __bang_gt((T *)auxiliary_e, (T *)auxiliary_f, (T *)auxiliary_e, deal_num); -#endif - // pts_assign = |x - cx| < dx / 2.0 && |y - cy| < dy / 2.0 && |z - cz| <= dz / 2.0 - __bang_mul((T *)pts_assign, (T *)auxiliary_c, (T *)auxiliary_d, deal_num); - __bang_mul((T *)pts_assign, (T *)pts_assign, (T *)auxiliary_e, deal_num); -} - -template -__mlu_func__ void computeStoreRoipointPool3d(char *boxes3d, - int *cnt, - char *points_x, - char *points_y, - char *points_z, - const char *point_features, - char *auxiliary_a, - char *auxiliary_b, - char *auxiliary_c, - char *auxiliary_d, - char *auxiliary_e, - char *auxiliary_f, - const int box_idx, - const int pts_num, - const int feature_in_len, - const int sampled_pts_num, - const size_t span_num_deal, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram) { - char *pts_assign = auxiliary_a; - if (cnt[box_idx] >= sampled_pts_num) { - return; - } - checkPointsInBox3d((T *)(boxes3d + box_idx * 7 * sizeof(T)), span_num_deal, (T *)points_x, - (T *)points_y, (T *)points_z, (T *)auxiliary_a, (T *)auxiliary_b, - (T *)auxiliary_c, (T *)auxiliary_d, (T *)auxiliary_e, (T *)auxiliary_f, - (T *)pts_assign); - - // __bang_select returns selected elements vector and the number of selected elements - __bang_select((T *)auxiliary_b, (T *)points_x, (T *)pts_assign, span_num_deal); - uint32_t select_num = *((uint32_t *)auxiliary_b); - - if (select_num == 0) { - return; - } - int sampled_pts_num_rem = sampled_pts_num - cnt[box_idx]; - int segnum = min((int)select_num, sampled_pts_num_rem) - 1; - - // copy x to pooled_features_gdram - // The result of __bang_select is composed of three parts: - // The first 4-byte is the number of selected element, whose data type is unsigned int. - // The next 124-byte is zero. The rest bytes are the selected elements. - int select_num_size = 128; - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T), - (T *)((int8_t *)auxiliary_b + select_num_size), sizeof(T), NRAM2GDRAM, - (3 + feature_in_len) * sizeof(T), sizeof(T), segnum); - - // copy y to pooled_features_gdram - __bang_collect((T *)auxiliary_d, (T *)points_y, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T) + - 1 * sizeof(T), - (T *)auxiliary_d, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - - // copy z to pooled_features_gdram - __bang_collect((T *)auxiliary_e, (T *)points_z, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T) + - 2 * sizeof(T), - (T *)auxiliary_e, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - - // copy features to pooled_features_gdram - for (int c_idx = 0; c_idx < feature_in_len; c_idx++) { - __memcpy(auxiliary_d, point_features + c_idx * pts_num * sizeof(T), span_num_deal * sizeof(T), - GDRAM2NRAM); - __bang_collect((T *)auxiliary_e, (T *)auxiliary_d, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T) + - (3 + c_idx) * sizeof(T), - auxiliary_e, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - } - - cnt[box_idx] += select_num; -} - -template -__mlu_func__ void computeStoreLastBlockRoipointPool3d(char *boxes3d, - int *cnt, - char *points_x, - char *points_y, - char *points_z, - const char *point_features, - char *auxiliary_a, - char *auxiliary_b, - char *auxiliary_c, - char *auxiliary_d, - char *auxiliary_e, - char *auxiliary_f, - const int box_idx, - const int pts_num, - const int feature_in_len, - const int sampled_pts_num, - const size_t span_num_deal, - const size_t auxiliary_num_deal, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram) { - char *pts_assign = auxiliary_a; - if (cnt[box_idx] >= sampled_pts_num) { - // pooled_empty_flag_gdram set 0 - *((int *)auxiliary_a) = 0; - __memcpy(pooled_empty_flag_gdram + box_idx * sizeof(int), auxiliary_a, sizeof(int), NRAM2GDRAM); - return; - } - checkPointsInBox3d((T *)(boxes3d + box_idx * 7 * sizeof(T)), span_num_deal, (T *)points_x, - (T *)points_y, (T *)points_z, (T *)auxiliary_a, (T *)auxiliary_b, - (T *)auxiliary_c, (T *)auxiliary_d, (T *)auxiliary_e, (T *)auxiliary_f, - (T *)pts_assign); - - // __bang_select returns selected elements vector and the number of selected elements - __bang_select((T *)auxiliary_b, (T *)points_x, (T *)pts_assign, span_num_deal); - uint32_t select_num = *((uint32_t *)auxiliary_b); - - if (cnt[box_idx] + select_num == 0) { - // pooled_empty_flag_gdram set 1 - *((int *)auxiliary_a) = 1; - __memcpy(pooled_empty_flag_gdram + box_idx * sizeof(int), auxiliary_a, sizeof(int), NRAM2GDRAM); - - // pooled_features_gdram set 0 - int repeat = (sampled_pts_num * (3 + feature_in_len)) / (auxiliary_num_deal * 6); - int rem = (sampled_pts_num * (3 + feature_in_len)) % (auxiliary_num_deal * 6); - // use auxiliary_a to auxiliary_f - __bang_write_zero((T *)auxiliary_a, PAD_UP(auxiliary_num_deal * 6, NFU_ALIGN_SIZE)); - if (repeat > 0) { - __memcpy(pooled_features_gdram + box_idx * sampled_pts_num * (3 + feature_in_len) * sizeof(T), - auxiliary_a, auxiliary_num_deal * 6 * sizeof(T), NRAM2GDRAM, - auxiliary_num_deal * 6 * sizeof(T), 0, repeat - 1); - } - if (rem > 0) { - __memcpy(pooled_features_gdram + - box_idx * sampled_pts_num * (3 + feature_in_len) * sizeof(T) + - repeat * auxiliary_num_deal * 6 * sizeof(T), - auxiliary_a, rem * sizeof(T), NRAM2GDRAM); - } - return; - } - - if (select_num > 0) { - int sampled_pts_num_rem = sampled_pts_num - cnt[box_idx]; - int segnum = min((int)select_num, sampled_pts_num_rem) - 1; - - // copy x to pooled_features_gdram - // The result of __bang_select is composed of three parts: - // The first 4-byte is the number of selected element, whose data type is unsigned int. - // The next 124-byte is zero. The rest bytes are the selected elements. - int select_num_size = 128; - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T), - (T *)((int8_t *)auxiliary_b + select_num_size), sizeof(T), NRAM2GDRAM, - (3 + feature_in_len) * sizeof(T), sizeof(T), segnum); - - // copy y to pooled_features_gdram - __bang_collect((T *)auxiliary_d, (T *)points_y, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T) + - 1 * sizeof(T), - (T *)auxiliary_d, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - - // copy z to pooled_features_gdram - __bang_collect((T *)auxiliary_e, (T *)points_z, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T) + - 2 * sizeof(T), - (T *)auxiliary_e, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - - // copy features to pooled_features_gdram - for (int c_idx = 0; c_idx < feature_in_len; c_idx++) { - __memcpy(auxiliary_d, point_features + c_idx * pts_num * sizeof(T), span_num_deal * sizeof(T), - GDRAM2NRAM); - __bang_collect((T *)auxiliary_e, (T *)auxiliary_d, (T *)pts_assign, span_num_deal); - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T) + - (3 + c_idx) * sizeof(T), - auxiliary_e, sizeof(T), NRAM2GDRAM, (3 + feature_in_len) * sizeof(T), sizeof(T), - segnum); - } - } - - // pooled_empty_flag_gdram set 0 - *((int *)auxiliary_a) = 0; - __memcpy(pooled_empty_flag_gdram + box_idx * sizeof(int), auxiliary_a, sizeof(int), NRAM2GDRAM); - - cnt[box_idx] += select_num; - if (cnt[box_idx] < sampled_pts_num) { - // duplicate same points for sampling - int repeat = sampled_pts_num / cnt[box_idx] - 1; - int rem = sampled_pts_num % cnt[box_idx]; - if (repeat > 0) { - __memcpy(pooled_features_gdram + - (box_idx * sampled_pts_num + cnt[box_idx]) * (3 + feature_in_len) * sizeof(T), - pooled_features_gdram + box_idx * sampled_pts_num * (3 + feature_in_len) * sizeof(T), - cnt[box_idx] * (3 + feature_in_len) * sizeof(T), GDRAM2GDRAM, - cnt[box_idx] * (3 + feature_in_len) * sizeof(T), 0, repeat - 1); - } - if (rem > 0) { - __memcpy(pooled_features_gdram + (box_idx * sampled_pts_num + (repeat + 1) * cnt[box_idx]) * - (3 + feature_in_len) * sizeof(T), - pooled_features_gdram + box_idx * sampled_pts_num * (3 + feature_in_len) * sizeof(T), - rem * (3 + feature_in_len) * sizeof(T), GDRAM2GDRAM); - } - } -} - -template -__mlu_global__ void MLUUnion1KernelRoiPointPool3dForward( - const int batch_size, - const int pts_num, - const int boxes_num, - const int feature_in_len, - const int sampled_pts_num, - const char *points_xyz_gdram, - const char *point_features_gdram, - const char *boxes3d_gdram, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram) { - if (coreId == 0x80) { - return; - } - size_t boxes_per_core = (batch_size * boxes_num) / taskDim; - size_t boxes_rem = (batch_size * boxes_num) % taskDim; - // calc batch_start, batch_end, first_batch_box_start, last batch_box_end for each core - int32_t batch_start = taskId < (boxes_rem + 1) ? - (taskId * (boxes_per_core + 1)) / boxes_num : - (taskId * boxes_per_core + boxes_rem) / boxes_num; - int32_t batch_end = taskId < boxes_rem ? - ((taskId + 1) * (boxes_per_core + 1) - 1) / boxes_num : - ((taskId + 1) * boxes_per_core + boxes_rem - 1) / boxes_num; - size_t first_batch_box_start = taskId < (boxes_rem + 1) ? - (taskId * (boxes_per_core + 1)) - batch_start * boxes_num : - taskId * boxes_per_core + boxes_rem - batch_start * boxes_num; - size_t last_batch_box_end = taskId < boxes_rem ? - (taskId + 1) * (boxes_per_core + 1) - batch_end * boxes_num : - ((taskId + 1) * boxes_per_core + boxes_rem) - batch_end * boxes_num; - - // points_xyz : [3, B, N] - const char *points_x_gdram = points_xyz_gdram; - const char *points_y_gdram = points_xyz_gdram + (1 * batch_size * pts_num) * sizeof(T); - const char *points_z_gdram = points_xyz_gdram + (2 * batch_size * pts_num) * sizeof(T); - - size_t boxes3d_size = PAD_UP(boxes_num * 7, NFU_ALIGN_SIZE) * sizeof(T); - size_t cnt_size = PAD_UP(boxes_num, NFU_ALIGN_SIZE) * sizeof(int); - size_t span_num_deal = PAD_DOWN( - (MAX_NRAM_SIZE - boxes3d_size - cnt_size) / TWELVE_SPLIT / sizeof(T), NFU_ALIGN_SIZE); - size_t align_num = NFU_ALIGN_SIZE; - int32_t repeat = pts_num / span_num_deal; - size_t rem = pts_num % span_num_deal; - size_t align_rem = CEIL_ALIGN(rem, align_num); - char *boxes3d = nram_buffer; - char *cnt = nram_buffer + boxes3d_size; - char *ping_points_x = cnt + cnt_size; - char *ping_points_y = ping_points_x + span_num_deal * sizeof(T); - char *ping_points_z = ping_points_y + span_num_deal * sizeof(T); - size_t ping_pong_gap = 3 * span_num_deal * sizeof(T); - char *auxiliary_a = ping_points_x + 2 * ping_pong_gap; - char *auxiliary_b = auxiliary_a + span_num_deal * sizeof(T); - char *auxiliary_c = auxiliary_b + span_num_deal * sizeof(T); - char *auxiliary_d = auxiliary_c + span_num_deal * sizeof(T); - char *auxiliary_e = auxiliary_d + span_num_deal * sizeof(T); - char *auxiliary_f = auxiliary_e + span_num_deal * sizeof(T); - size_t span_load_input1_size = span_num_deal * sizeof(T); - size_t span_load_input2_size = span_num_deal * sizeof(T); - size_t span_load_input3_size = span_num_deal * sizeof(T); - size_t span_load_input4_size = span_num_deal * sizeof(T); - - for (int bs_idx = batch_start; bs_idx <= batch_end; bs_idx++) { - __memcpy_async(boxes3d, boxes3d_gdram + bs_idx * boxes_num * 7 * sizeof(T), - boxes_num * 7 * sizeof(T), GDRAM2NRAM); - __bang_write_zero((int *)cnt, PAD_UP(boxes_num, NFU_ALIGN_SIZE)); - - const char *points_x_start = points_x_gdram + bs_idx * pts_num * sizeof(T); - const char *points_y_start = points_y_gdram + bs_idx * pts_num * sizeof(T); - const char *points_z_start = points_z_gdram + bs_idx * pts_num * sizeof(T); - const char *point_features_start = - point_features_gdram + bs_idx * feature_in_len * pts_num * sizeof(T); - char *pooled_features_start = - pooled_features_gdram + - (bs_idx * boxes_num * sampled_pts_num * (3 + feature_in_len)) * sizeof(T); - char *pooled_empty_flag_start = pooled_empty_flag_gdram + bs_idx * boxes_num * sizeof(int); - size_t box_start = bs_idx == batch_start ? first_batch_box_start : 0; - size_t box_end = bs_idx == batch_end ? last_batch_box_end : boxes_num; - - if (repeat > 0) { - __memcpy_async(ping_points_x, points_x_start, span_load_input1_size, GDRAM2NRAM); - __memcpy_async(ping_points_y, points_y_start, span_load_input2_size, GDRAM2NRAM); - __memcpy_async(ping_points_z, points_z_start, span_load_input3_size, GDRAM2NRAM); - __asm__ volatile("sync;"); - } - - for (int i = 0; i < repeat - 1; i++) { - __memcpy_async(ping_points_x + ((i + 1) % 2) * ping_pong_gap, - points_x_start + (i + 1) * span_load_input1_size, span_load_input1_size, - GDRAM2NRAM); - __memcpy_async(ping_points_y + ((i + 1) % 2) * ping_pong_gap, - points_y_start + (i + 1) * span_load_input2_size, span_load_input2_size, - GDRAM2NRAM); - __memcpy_async(ping_points_z + ((i + 1) % 2) * ping_pong_gap, - points_z_start + (i + 1) * span_load_input3_size, span_load_input3_size, - GDRAM2NRAM); - for (int box_idx = box_start; box_idx < box_end; box_idx++) { - computeStoreRoipointPool3d( - boxes3d, (int *)cnt, ping_points_x + (i % 2) * ping_pong_gap, - ping_points_y + (i % 2) * ping_pong_gap, ping_points_z + (i % 2) * ping_pong_gap, - point_features_start + i * span_load_input4_size, auxiliary_a, auxiliary_b, auxiliary_c, - auxiliary_d, auxiliary_e, auxiliary_f, box_idx, pts_num, feature_in_len, - sampled_pts_num, span_num_deal, pooled_features_start, pooled_empty_flag_start); - } - __asm__ volatile("sync;"); - } - - if (rem > 0) { - if (sizeof(T) == sizeof(float)) { - __bang_write_value((T *)(ping_points_x + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - __bang_write_value((T *)(ping_points_y + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - __bang_write_value((T *)(ping_points_z + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - } else { - __bang_write_value((T *)(ping_points_x + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - __bang_write_value((T *)(ping_points_y + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - __bang_write_value((T *)(ping_points_z + (repeat % 2) * ping_pong_gap + - PAD_DOWN(rem, NFU_ALIGN_SIZE) * sizeof(T)), - NFU_ALIGN_SIZE, (T)NAN); - } - __memcpy_async(ping_points_x + (repeat % 2) * ping_pong_gap, - points_x_start + repeat * span_load_input1_size, rem * sizeof(T), GDRAM2NRAM); - __memcpy_async(ping_points_y + (repeat % 2) * ping_pong_gap, - points_y_start + repeat * span_load_input2_size, rem * sizeof(T), GDRAM2NRAM); - __memcpy_async(ping_points_z + (repeat % 2) * ping_pong_gap, - points_z_start + repeat * span_load_input3_size, rem * sizeof(T), GDRAM2NRAM); - } - - if (repeat > 0 && rem > 0) { - for (int box_idx = box_start; box_idx < box_end; box_idx++) { - computeStoreRoipointPool3d( - boxes3d, (int *)cnt, ping_points_x + ((repeat - 1) % 2) * ping_pong_gap, - ping_points_y + ((repeat - 1) % 2) * ping_pong_gap, - ping_points_z + ((repeat - 1) % 2) * ping_pong_gap, - point_features_start + (repeat - 1) * span_load_input4_size, auxiliary_a, auxiliary_b, - auxiliary_c, auxiliary_d, auxiliary_e, auxiliary_f, box_idx, pts_num, feature_in_len, - sampled_pts_num, span_num_deal, pooled_features_start, pooled_empty_flag_start); - } - } else if (repeat > 0 && rem == 0) { - for (int box_idx = box_start; box_idx < box_end; box_idx++) { - computeStoreLastBlockRoipointPool3d( - boxes3d, (int *)cnt, ping_points_x + ((repeat - 1) % 2) * ping_pong_gap, - ping_points_y + ((repeat - 1) % 2) * ping_pong_gap, - ping_points_z + ((repeat - 1) % 2) * ping_pong_gap, - point_features_start + (repeat - 1) * span_load_input4_size, auxiliary_a, auxiliary_b, - auxiliary_c, auxiliary_d, auxiliary_e, auxiliary_f, box_idx, pts_num, feature_in_len, - sampled_pts_num, span_num_deal, span_num_deal, pooled_features_start, - pooled_empty_flag_start); - } - } - - if (rem > 0) { - __asm__ volatile("sync;"); - for (int box_idx = box_start; box_idx < box_end; box_idx++) { - computeStoreLastBlockRoipointPool3d( - boxes3d, (int *)cnt, ping_points_x + (repeat % 2) * ping_pong_gap, - ping_points_y + (repeat % 2) * ping_pong_gap, - ping_points_z + (repeat % 2) * ping_pong_gap, - point_features_start + repeat * span_load_input4_size, auxiliary_a, auxiliary_b, - auxiliary_c, auxiliary_d, auxiliary_e, auxiliary_f, box_idx, pts_num, feature_in_len, - sampled_pts_num, align_rem, span_num_deal, pooled_features_start, - pooled_empty_flag_start); - } - } - } -} - -template __mlu_global__ void MLUUnion1KernelRoiPointPool3dForward( - const int batch_size, - const int pts_num, - const int boxes_num, - const int feature_in_len, - const int sampled_pts_num, - const char *points_xyz_gdram, - const char *point_features_gdram, - const char *boxes3d_gdram, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram); - -template __mlu_global__ void MLUUnion1KernelRoiPointPool3dForward( - const int batch_size, - const int pts_num, - const int boxes_num, - const int feature_in_len, - const int sampled_pts_num, - const char *points_xyz_gdram, - const char *point_features_gdram, - const char *boxes3d_gdram, - char *pooled_features_gdram, - char *pooled_empty_flag_gdram); - -void KernelRoiPointPool3dForward(cnrtDim3_t k_dim, - cnrtFunctionType_t k_type, - cnrtQueue_t queue, - const cnrtDataType_t d_type, - const int batch_size, - const int pts_num, - const int boxes_num, - const int feature_in_len, - const int sampled_pts_num, - const void *points_xyz, - const void *boxes3d, - const void *point_features, - void *pooled_features, - int *pooled_empty_flag) { - switch (d_type) { - default: { break; } - case CNRT_FLOAT32: { - MLUUnion1KernelRoiPointPool3dForward<<>>( - batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, - (char *)points_xyz, (char *)point_features, (char *)boxes3d, - (char *)pooled_features, (char *)pooled_empty_flag); - }; break; - case CNRT_FLOAT16: { - MLUUnion1KernelRoiPointPool3dForward<<>>( - batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, - (char *)points_xyz, (char *)point_features, (char *)boxes3d, - (char *)pooled_features, (char *)pooled_empty_flag); - }; break; - } -} diff --git a/mmcv/ops/csrc/common/mlu/three_nn_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/three_nn_mlu_kernel.mlu deleted file mode 100644 index 7927385..0000000 --- a/mmcv/ops/csrc/common/mlu/three_nn_mlu_kernel.mlu +++ /dev/null @@ -1,466 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "common_mlu_helper.hpp" -#include - -__nram__ char nram_buffer[MAX_NRAM_SIZE]; - -#if __BANG_ARCH__ >= 322 -/** - * returns the index of ret, which is stored at the 1st position of the `ret`, - * used after bang_min - */ -__mlu_func__ uint32_t getIndice(half *ret) { - uint32_t indice = *((uint32_t *)((uint16_t *)ret + 1)); - return indice; -} - -/** - * returns the index of ret, which is stored at the 1st position of the `ret`, - * used after bang_min - */ -__mlu_func__ uint32_t getIndice(float *ret) { - uint32_t indice = ((uint32_t *)ret)[1]; - return indice; -} -#endif - -template -__mlu_func__ void auxArgmin(T *nram_dst, T *nram_src, const int num_deal, - T *value, int *index) { - __bang_min(nram_dst, nram_src, num_deal); - *value = nram_dst[0]; - __bang_write_value(nram_dst, num_deal, *value); - __bang_eq(nram_dst, nram_src, nram_dst, num_deal); - __bang_findfirst1((uint32_t *)nram_dst, nram_dst, num_deal); - *index = *((int *)nram_dst); -} - -template -__mlu_func__ void auxFuncFind3Min(T *nram_aux_a, const int auxa_offset, - int *nram_aux_b, const int auxb_offset, - T *nram_dest, T *nram_aux_sort_a, - int *nram_aux_sort_b, const int deal_offset) { - __bang_write_value(nram_aux_sort_a, auxa_offset, (T)(INFINITY)); - __bang_write_value(nram_aux_sort_b, auxb_offset, (int)0); - int index = 0; - for (int i = 0; i < 3; i++) { -#if __BANG_ARCH__ >= 322 - __bang_argmin(nram_dest, nram_aux_a, auxa_offset); - nram_aux_sort_a[i] = nram_dest[0]; - index = getIndice(nram_dest); -#else - T value = 0; - auxArgmin(nram_dest, nram_aux_a, auxa_offset, &value, &index); - nram_aux_sort_a[i] = value; -#endif - nram_aux_sort_b[i] = nram_aux_b[index]; - __memset_nram(nram_aux_a + index, 1, (T)(INFINITY)); - } - __memcpy((char *)nram_aux_a, (char *)nram_aux_sort_a, auxa_offset * sizeof(T), - NRAM2NRAM); - __memcpy((char *)nram_aux_b, (char *)nram_aux_sort_b, - auxb_offset * sizeof(int), NRAM2NRAM); -} - -template -__mlu_func__ void auxFuncSort(T *nram_aux_a, const int auxa_offset, - int *nram_aux_b, const int auxb_offset, - T *nram_dest, T *nram_help_value, - int *nram_help_idx, const int num_deal, - const int deal_offset) { - for (int k = 0; k < num_deal; ++k) { - auxFuncFind3Min(nram_aux_a + k * auxa_offset, auxa_offset, - nram_aux_b + k * auxb_offset, auxb_offset, nram_dest, - nram_help_value, nram_help_idx, deal_offset); - } -} - -template -__mlu_func__ void auxFuncNN( - size_t *output_aux_sort_a_gap, size_t *output_aux_sort_b_gap, - size_t *output_aux_dest_gap, size_t *output_unknown_gap, - size_t *output_known_gap, size_t *output_dist_gap, size_t *auxillary_a_gap, - size_t *auxillary_b_gap, size_t *known_num_deal, size_t *unknown_num_deal, - size_t *align_num, size_t *auxa_offset, size_t *auxb_offset) { - /* - * nram partition: - * |-NFU_ALIGN_SIZE-|-2*NFU_ALIGN_SIZE-|-X*3*sizeof(T)-| - * space: | aux_sort_a | aux_sort_b | nram_unknown | - * - * | ------ (Y * 7 *sizeof(T)) ---------------- | - * | nram_known | nram_dist | nram_dest | - * - * | -X * NFU_ALIGN_SIZE ---|---X * 2 * NFU_ALIGN_SIZE-| - * | output_dist(aux_a) | output_dist(aux_b) | - * 200 series - * X = (MAX_NRAM - 3 * NFU_ALIGN_SIZE) * (2/3) / (3 * sizeof(T) + 3 * - * NFU_ALIGN_SIZE) - * Y = (MAX_NRAM - 3 * NFU_ALIGN_SIZE) * (1/3) / (7 * sizeof(T)) - * 300 series - * X = (MAX_NRAM - 3 * NFU_ALIGN_SIZE) * (4/5) / (3 * - * sizeof(T) + 3 * NFU_ALIGN_SIZE) - * Y = (MAX_NRAM - 3 * NFU_ALIGN_SIZE) * - * (1/5) / (7 * sizeof(T)) - * - */ - - *align_num = NFU_ALIGN_SIZE / sizeof(T); - *auxa_offset = NFU_ALIGN_SIZE / sizeof(T); - *auxb_offset = 2 * NFU_ALIGN_SIZE / sizeof(int); -#if __BANG_ARCH__ >= 322 - *known_num_deal = PAD_DOWN( - (MAX_NRAM_SIZE - 3 * NFU_ALIGN_SIZE) / 5 / (7 * sizeof(T)), *align_num); - *unknown_num_deal = PAD_DOWN((MAX_NRAM_SIZE - 3 * NFU_ALIGN_SIZE) / 5 * 4 / - (3 * sizeof(T) + 3 * NFU_ALIGN_SIZE), - *align_num); -#else - *known_num_deal = PAD_DOWN( - (MAX_NRAM_SIZE - 3 * NFU_ALIGN_SIZE) / 3 / (7 * sizeof(T)), *align_num); - *unknown_num_deal = PAD_DOWN((MAX_NRAM_SIZE - 3 * NFU_ALIGN_SIZE) / 3 * 2 / - (3 * sizeof(T) + 3 * NFU_ALIGN_SIZE), - *align_num); -#endif - - *output_aux_sort_a_gap = 0; - *output_aux_sort_b_gap = *output_aux_sort_a_gap + NFU_ALIGN_SIZE; - *output_aux_dest_gap = *output_aux_sort_b_gap + 2 * NFU_ALIGN_SIZE; - - *output_unknown_gap = *output_aux_dest_gap + *known_num_deal * sizeof(T); - *output_known_gap = *output_unknown_gap + *unknown_num_deal * 3 * sizeof(T); - *output_dist_gap = *output_known_gap + *known_num_deal * 3 * sizeof(T); - *auxillary_a_gap = *output_dist_gap + *known_num_deal * 3 * sizeof(T); - *auxillary_b_gap = *auxillary_a_gap + *unknown_num_deal * NFU_ALIGN_SIZE; -} - -#if __BANG_ARCH__ >= 322 -template -__mlu_func__ bool containNanInf(T *nram_unknown) { - if (std::isnan(nram_unknown[0]) || std::isnan(nram_unknown[1]) || - std::isnan(nram_unknown[2]) || std::isinf(nram_unknown[0]) || - std::isinf(nram_unknown[1]) || std::isinf(nram_unknown[2])) - return true; - else - return false; -} -#endif - -template -__mlu_func__ void computeThreeNN(T *nram_unknown, T *nram_known, T *nram_dist, - T *nram_dest, T *nram_aux_a, - T *nram_aux_sort_a, int *nram_aux_b, - int *nram_aux_sort_b, const int known_num_deal, - const int known_seg_num, const int deal_offset, - const int known_count, - const int known_count_align) { - __bang_write_value(nram_dist, 3 * known_num_deal, (T)(INFINITY)); -#if __BANG_ARCH__ >= 322 - if (!containNanInf(nram_unknown)) { -#endif - // x1 - x2 - __bang_sub_scalar(nram_dist, nram_known, nram_unknown[0], - known_count_align); - // y1 - y2 - __bang_sub_scalar(nram_dist + known_count_align, - nram_known + known_count_align, nram_unknown[1], - known_count_align); - // z1 - z2 - __bang_sub_scalar(nram_dist + 2 * known_count_align, - nram_known + 2 * known_count_align, nram_unknown[2], - known_count_align); - __bang_square(nram_dist, nram_dist, 3 * known_count_align); - __bang_add(nram_dist, nram_dist, nram_dist + known_count_align, - known_count_align); - __bang_add(nram_dist, nram_dist, nram_dist + 2 * known_count_align, - known_count_align); -#if __BANG_ARCH__ >= 322 - } -#endif - - int index = 0; - for (int i = 0; i < 3; i++) { -#if __BANG_ARCH__ >= 322 - __bang_argmin(nram_dest, nram_dist, known_count_align); - nram_aux_a[i + deal_offset] = nram_dest[0]; - index = getIndice(nram_dest); -#else - T value = 0; - auxArgmin(nram_dest, nram_dist, known_count_align, &value, &index); - nram_aux_a[i + deal_offset] = value; -#endif - nram_aux_b[i + deal_offset] = index + known_seg_num * known_num_deal; - __memset_nram(nram_dist + index, 1, (T)(INFINITY)); - } -} - -template -__mlu_func__ void loadTransposedKnownTensor( - char *nram_known, char *nram_dist, const char *known_gdram, - const int known_num_deal, const int batch_id, const int m, - const int known_seg_num, const int count, const int count_align_num) { - __bang_write_value(nram_known, 3 * known_num_deal, (T)(INFINITY)); -#if __BANG_ARCH__ >= 322 - __bang_write_value(nram_dist, 3 * known_num_deal, (T)(INFINITY)); - __memcpy(nram_dist, - known_gdram + - (batch_id * m * 3 + known_seg_num * known_num_deal) * sizeof(T), - count * sizeof(T), GDRAM2NRAM, count_align_num * sizeof(T), - m * sizeof(T), 2); - __bang_minequal((T *)nram_known, (T *)nram_known, (T *)nram_dist, - 3 * count_align_num); -#else - __memcpy(nram_known, - known_gdram + - (batch_id * m * 3 + known_seg_num * known_num_deal) * sizeof(T), - count * sizeof(T), GDRAM2NRAM, count_align_num * sizeof(T), - m * sizeof(T), 2); -#endif -} - -template -__mlu_func__ void loadUnknownTensor(char *nram_unknown, - const char *unknown_gdram, - const int unknown_num_deal, - const int unknown_seg_num, const int count, - const int count_align_num) { - __memcpy(nram_unknown, - unknown_gdram + unknown_seg_num * unknown_num_deal * 3 * sizeof(T), - count * 3 * sizeof(T), GDRAM2NRAM); -} - -template -__mlu_func__ void auxProcessSegment( - const int m, const int n, T *nram_unknown, T *nram_known, T *nram_dist, - T *nram_dest, T *known_gdram, T *nram_aux_a, const int auxa_offset, - int *nram_aux_b, const int auxb_offset, T *nram_aux_sort_a, - int *nram_aux_sort_b, const int unknown_num_deal, const int known_num_deal, - const int known_seg_num, const int unknown_seg_num, const int unknown_count, - const int known_count, const int known_count_align, const int start_idx, - int *deal_offset) { - int pre_batch_id = -1; - int cur_batch_id = -1; - pre_batch_id = start_idx / n; - - // if aux_a space is not enough, get the first 3 min among aux_a and clear. - if (*deal_offset >= PAD_DOWN(auxa_offset, 3)) { - auxFuncSort(nram_aux_a, auxa_offset, nram_aux_b, auxb_offset, nram_dest, - nram_aux_sort_a, nram_aux_sort_b, unknown_count, *deal_offset); - *deal_offset = 3; - } - - // load i'th segment of known batch data. - loadTransposedKnownTensor((char *)nram_known, (char *)nram_dist, - (char *)known_gdram, known_num_deal, - pre_batch_id, m, known_seg_num, known_count, - known_count_align); - - for (int k = 0; k < unknown_count; ++k) { - cur_batch_id = (start_idx + k) / n; - if (cur_batch_id != pre_batch_id) { // if batch id of unknown data changed, - // load corresponding known batch data - pre_batch_id = cur_batch_id; - loadTransposedKnownTensor((char *)nram_known, (char *)nram_dist, - (char *)known_gdram, known_num_deal, - pre_batch_id, m, known_seg_num, known_count, - known_count_align); - } - computeThreeNN(nram_unknown + 3 * k, nram_known, nram_dist, nram_dest, - nram_aux_a + k * auxa_offset, nram_aux_sort_a, - nram_aux_b + k * auxb_offset, nram_aux_sort_b, - known_num_deal, known_seg_num, *deal_offset, known_count, - known_count_align); - } -} - -template -__mlu_global__ void MLUUnion1KernelThreeNN(const int b, const int n, - const int m, char *unknown_gdram, - char *known_gdram, char *dist2_gdram, - int *idx_gdram) { - if (coreId == 0x80) { - return; - } - - size_t output_aux_sort_a_gap = 0, output_aux_sort_b_gap = 0, - output_dest_gap = 0, output_unknown_gap = 0, output_known_gap = 0, - output_dist_gap = 0, auxillary_a_gap = 0, auxillary_b_gap = 0, - known_num_deal = 0, unknown_num_deal = 0, align_num = 0, - auxa_offset = 0, auxb_offset = 0; - auxFuncNN(&output_aux_sort_a_gap, &output_aux_sort_b_gap, &output_dest_gap, - &output_unknown_gap, &output_known_gap, &output_dist_gap, - &auxillary_a_gap, &auxillary_b_gap, &known_num_deal, - &unknown_num_deal, &align_num, &auxa_offset, &auxb_offset); - - int num_per_core = b * n / taskDim; - const int core_offset = num_per_core; - - char *unknown_gdram_start = - unknown_gdram + taskId * 3 * core_offset * sizeof(T); - char *known_gdram_start = known_gdram; - char *output_dist_start = dist2_gdram + taskId * 3 * core_offset * sizeof(T); - int *output_idx_start = idx_gdram + taskId * 3 * core_offset; - - const int rem = (b * n) % taskDim; - if (taskId == taskDim - 1) { - num_per_core += rem; - } - - const int unknown_repeat = - num_per_core / unknown_num_deal; // if unknown number is big, process it - // by unknown_repeat times. - const int unknown_rem = num_per_core % unknown_num_deal; // unknown reminder - const int unknown_rem_align = PAD_UP(unknown_rem, align_num); - - const int known_repeat = - m / known_num_deal; // if known number is big, process it by - // unknown_repeat times. - const int known_rem = m % known_num_deal; // known reminder - const int known_rem_align = PAD_UP(known_rem, align_num); - - char *nram_aux_sort_a = nram_buffer; - int *nram_aux_sort_b = (int *)(nram_buffer + output_aux_sort_b_gap); - char *nram_dest = nram_buffer + output_dest_gap; - char *nram_unknown = nram_buffer + output_unknown_gap; - char *nram_known = nram_buffer + output_known_gap; - char *nram_dist = nram_buffer + output_dist_gap; - char *nram_aux_a = nram_buffer + auxillary_a_gap; - int *nram_aux_b = (int *)(nram_buffer + auxillary_b_gap); - int deal_offset = 0; - int start_idx = -1; - - for (int j = 0; j < unknown_repeat; - ++j) { // process data within a unknown_repeat - // if unknown need to be process segmentally, use a aux_a and aux_b - // space to find first 3 minimum dist. - __bang_write_value(nram_aux_a, unknown_num_deal * auxa_offset, - (T)(INFINITY)); - __bang_write_value(nram_aux_b, unknown_num_deal * auxb_offset, (int)0); - loadUnknownTensor(nram_unknown, unknown_gdram_start, unknown_num_deal, j, - unknown_num_deal, unknown_num_deal); - - deal_offset = 0; - start_idx = taskId * core_offset + j * unknown_num_deal; - - for (int i = 0; i < known_repeat; - ++i) { // process known data in segmentally. - auxProcessSegment( - m, n, (T *)nram_unknown, (T *)nram_known, (T *)nram_dist, - (T *)nram_dest, (T *)known_gdram_start, (T *)nram_aux_a, auxa_offset, - nram_aux_b, auxb_offset, (T *)nram_aux_sort_a, nram_aux_sort_b, - unknown_num_deal, known_num_deal, i, j, unknown_num_deal, - known_num_deal, known_num_deal, start_idx, &deal_offset); - deal_offset += 3; - } - - if (known_rem > 0) { // process known rem - __bang_write_value(nram_known, 3 * known_num_deal, (T)(INFINITY)); - auxProcessSegment( - m, n, (T *)nram_unknown, (T *)nram_known, (T *)nram_dist, - (T *)nram_dest, (T *)known_gdram_start, (T *)nram_aux_a, auxa_offset, - nram_aux_b, auxb_offset, (T *)nram_aux_sort_a, nram_aux_sort_b, - unknown_num_deal, known_num_deal, known_repeat, j, unknown_num_deal, - known_rem, known_rem_align, start_idx, &deal_offset); - } - - deal_offset += 3; - - if (deal_offset > 3) { - auxFuncSort((T *)nram_aux_a, auxa_offset, nram_aux_b, auxb_offset, - (T *)nram_dest, (T *)nram_aux_sort_a, nram_aux_sort_b, - unknown_num_deal, deal_offset); - deal_offset = 0; - } - - __memcpy((char *)output_dist_start + j * unknown_num_deal * 3 * sizeof(T), - (char *)nram_aux_a, 3 * sizeof(T), NRAM2GDRAM, 3 * sizeof(T), - auxa_offset * sizeof(T), unknown_num_deal - 1); - __memcpy((char *)output_idx_start + j * unknown_num_deal * 3 * sizeof(int), - (char *)nram_aux_b, 3 * sizeof(int), NRAM2GDRAM, 3 * sizeof(int), - auxb_offset * sizeof(int), unknown_num_deal - 1); - } - - if (unknown_rem > 0) { // process unknown rem - deal_offset = 0; - __bang_write_value(nram_aux_a, unknown_num_deal * auxa_offset, - (T)(INFINITY)); - __bang_write_value(nram_aux_b, unknown_num_deal * auxb_offset, (int)0); - loadUnknownTensor(nram_unknown, unknown_gdram_start, unknown_num_deal, - unknown_repeat, unknown_rem, unknown_rem_align); - start_idx = taskId * core_offset + unknown_repeat * unknown_num_deal; - - for (int i = 0; i < known_repeat; ++i) { - auxProcessSegment( - m, n, (T *)nram_unknown, (T *)nram_known, (T *)nram_dist, - (T *)nram_dest, (T *)known_gdram_start, (T *)nram_aux_a, auxa_offset, - nram_aux_b, auxb_offset, (T *)nram_aux_sort_a, nram_aux_sort_b, - unknown_num_deal, known_num_deal, i, unknown_repeat, unknown_rem, - known_num_deal, known_num_deal, start_idx, &deal_offset); - deal_offset += 3; - } - - if (known_rem > 0) { - __bang_write_value(nram_known, 3 * known_num_deal, (T)(INFINITY)); - start_idx = taskId * core_offset + unknown_repeat * unknown_num_deal; - - auxProcessSegment( - m, n, (T *)nram_unknown, (T *)nram_known, (T *)nram_dist, - (T *)nram_dest, (T *)known_gdram_start, (T *)nram_aux_a, auxa_offset, - nram_aux_b, auxb_offset, (T *)nram_aux_sort_a, nram_aux_sort_b, - unknown_num_deal, known_num_deal, known_repeat, unknown_repeat, - unknown_rem, known_rem, known_rem_align, start_idx, &deal_offset); - - deal_offset += 3; - } - if (deal_offset > 3) { - auxFuncSort((T *)nram_aux_a, auxa_offset, nram_aux_b, auxb_offset, - (T *)nram_dest, (T *)nram_aux_sort_a, nram_aux_sort_b, - unknown_rem, deal_offset); - deal_offset = 0; - } - - __memcpy((char *)output_dist_start + - unknown_repeat * unknown_num_deal * 3 * sizeof(T), - (char *)nram_aux_a, 3 * sizeof(T), NRAM2GDRAM, 3 * sizeof(T), - auxa_offset * sizeof(T), unknown_rem - 1); - __memcpy((char *)output_idx_start + - unknown_repeat * unknown_num_deal * 3 * sizeof(int), - (char *)nram_aux_b, 3 * sizeof(int), NRAM2GDRAM, 3 * sizeof(int), - auxb_offset * sizeof(int), unknown_rem - 1); - } -} - -template __mlu_global__ void MLUUnion1KernelThreeNN( - const int b, const int n, const int m, char *unknown_gdram, - char *known_gdram, char *dist2_gdram, int *idx_gdram); - -template __mlu_global__ void MLUUnion1KernelThreeNN( - const int b, const int n, const int m, char *unknown_gdram, - char *known_gdram, char *dist2_gdram, int *idx_gdram); - -void KernelThreeNNForward(cnrtDim3_t k_dim, cnrtFunctionType_t k_type, - cnrtQueue_t queue, cnrtDataType_t data_type, - const void *unknown, const void *known, void *dist2, - int *idx, const int b, const int n, const int m) { - switch (data_type) { - case CNRT_FLOAT16: { - MLUUnion1KernelThreeNN<<>>( - b, n, m, (char *)unknown, (char *)known, (char *)dist2, idx); - }; break; - case CNRT_FLOAT32: { - MLUUnion1KernelThreeNN<<>>( - b, n, m, (char *)unknown, (char *)known, (char *)dist2, idx); - }; break; - default: { - break; - } - } -} diff --git a/mmcv/ops/csrc/common/mlu/tin_shift_mlu_kernel.mlu b/mmcv/ops/csrc/common/mlu/tin_shift_mlu_kernel.mlu deleted file mode 100644 index ed64c2b..0000000 --- a/mmcv/ops/csrc/common/mlu/tin_shift_mlu_kernel.mlu +++ /dev/null @@ -1,307 +0,0 @@ -/************************************************************************* - * Copyright (C) 2022 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#include "common_mlu_helper.hpp" - -__nram__ char data_nram[MAX_NRAM_SIZE]; - -template -__mlu_func__ void mluMultiKernelTinShift( - const T *input, const int *shifts, T *output, const int batch_size, - const int time_size, const int channel_size, const int hw_size, - const int group_size, const int group_channel) { - for (int cur_channel_index = taskId; - cur_channel_index < batch_size * channel_size; - cur_channel_index += taskDim) { - int n_index = cur_channel_index / channel_size; - int group_id = cur_channel_index % channel_size / group_channel; - int t_shift = shifts[n_index * group_size + group_id]; - int index = cur_channel_index % channel_size * hw_size + - n_index * time_size * channel_size * hw_size; - __bang_write_value(data_nram, MAX_NRAM_SIZE, (char)0); - __asm__ volatile("sync;"); - if (abs(t_shift) >= time_size) { - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - time_size - 1); - } else { - if (t_shift > 0) { - __memcpy(data_nram + t_shift * hw_size * sizeof(T), input + index, - hw_size * sizeof(T), GDRAM2NRAM, hw_size * sizeof(T), - channel_size * hw_size * sizeof(T), time_size - 1 - t_shift); - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - time_size - 1); - } else { - __memcpy(data_nram, input + (index - t_shift * channel_size * hw_size), - hw_size * sizeof(T), GDRAM2NRAM, hw_size * sizeof(T), - channel_size * hw_size * sizeof(T), time_size - 1 + t_shift); - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - time_size - 1); - } - } - __asm__ volatile("sync;"); - } -} - -template -__mlu_func__ void mluHwSplit(const T *input, const int t_shift, - const int time_size, const int hw_size, - const int channel_size, const int index, - const int cur_sequence_index, - const int max_length_per_core, T *output) { - for (int cur_index = index; cur_index < index + hw_size; - cur_index += max_length_per_core) { - int memcpy_size = max_length_per_core; - if (cur_index + max_length_per_core > index + hw_size) { - memcpy_size = index + hw_size - cur_index; - } - if (cur_sequence_index - t_shift < 0 || - cur_sequence_index - t_shift >= time_size) { - __memcpy(output + cur_index, data_nram, memcpy_size * sizeof(T), - NRAM2GDRAM); - } else { - __memcpy(data_nram, input + cur_index - t_shift * channel_size * hw_size, - memcpy_size * sizeof(T), GDRAM2NRAM); - __memcpy(output + cur_index, data_nram, memcpy_size * sizeof(T), - NRAM2GDRAM); - } - __asm__ volatile("sync;"); - } -} - -template -__mlu_func__ void mluMultiKernelTinShiftSplitSequence( - const T *input, const int *shifts, T *output, const int batch_size, - const int time_size, const int channel_size, const int hw_size, - const int group_size, const int group_channel, - const int max_number_hw_per_core, const int max_length_per_core) { - const int tmp_max_number_hw_per_core = - max_number_hw_per_core > 0 ? max_number_hw_per_core : 1; - const int loop_time = time_size / tmp_max_number_hw_per_core + - ((time_size % tmp_max_number_hw_per_core) > 0 ? 1 : 0); - int segmentime_size = tmp_max_number_hw_per_core; - int res_segment = time_size % tmp_max_number_hw_per_core; - - for (int cur_segment_index = taskId; - cur_segment_index < loop_time * batch_size * channel_size; - cur_segment_index += taskDim) { - int n_index = cur_segment_index / loop_time / channel_size; - int group_id = cur_segment_index / loop_time % channel_size / group_channel; - int t_shift = shifts[n_index * group_size + group_id]; - int index = n_index * time_size * channel_size * hw_size + - (cur_segment_index / loop_time % channel_size) * hw_size + - cur_segment_index % loop_time * segmentime_size * hw_size * - channel_size; - char *dst_gdram2nram = data_nram; - const T *src_gdram2nram = input + index; - int count_gdram2nram = -1; - int count_nram2gdram = -1; - int next_sequence_index = - index / hw_size / channel_size % time_size + segmentime_size; - int cur_sequence_index = index / hw_size / channel_size % time_size; - __bang_write_value(data_nram, MAX_NRAM_SIZE, (char)0); - __asm__ volatile("sync;"); - if (max_number_hw_per_core == 0) { - mluHwSplit(input, t_shift, time_size, hw_size, channel_size, index, - cur_sequence_index, max_length_per_core, output); - continue; - } - if (abs(t_shift) >= time_size) { - if ((cur_segment_index + 1) % loop_time == 0 && res_segment != 0) { - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - res_segment - 1); - } else { - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - segmentime_size - 1); - } - continue; - } - if (t_shift == 0) { - if ((cur_segment_index + 1) % loop_time == 0 && res_segment != 0) { - dst_gdram2nram = data_nram; - src_gdram2nram = input + index; - count_gdram2nram = res_segment - 1; - count_nram2gdram = res_segment - 1; - } else { - dst_gdram2nram = data_nram; - src_gdram2nram = input + index; - count_gdram2nram = segmentime_size - 1; - count_nram2gdram = segmentime_size - 1; - } - } else if (t_shift > 0) { - int first_index_cur_channel = - n_index * time_size * channel_size * hw_size + - (cur_segment_index / loop_time % channel_size) * hw_size; - if ((cur_segment_index + 1) % loop_time == 0 && res_segment != 0) { - dst_gdram2nram = data_nram; - src_gdram2nram = - input + - (index - t_shift * channel_size * hw_size < first_index_cur_channel - ? first_index_cur_channel - : index - t_shift * channel_size * hw_size); - count_gdram2nram = res_segment - 1; - count_nram2gdram = res_segment - 1; - if (cur_sequence_index < t_shift && t_shift < next_sequence_index) { - dst_gdram2nram = - data_nram + t_shift % segmentime_size * hw_size * sizeof(T); - count_gdram2nram = res_segment - (t_shift - cur_sequence_index) - 1; - } - } else { - if (t_shift >= next_sequence_index) { - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - segmentime_size - 1); - continue; - } else if (cur_sequence_index < t_shift && - t_shift < next_sequence_index) { - dst_gdram2nram = - data_nram + t_shift % segmentime_size * hw_size * sizeof(T); - src_gdram2nram = input + first_index_cur_channel; - count_gdram2nram = segmentime_size - (t_shift % segmentime_size) - 1; - count_nram2gdram = segmentime_size - 1; - } else { - dst_gdram2nram = data_nram; - src_gdram2nram = input + index - t_shift * channel_size * hw_size; - count_gdram2nram = segmentime_size - 1; - count_nram2gdram = segmentime_size - 1; - } - } - } else { - int offset_index = time_size + t_shift; - if (cur_sequence_index >= offset_index) { - if ((cur_segment_index + 1) % loop_time == 0 && res_segment != 0) { - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - res_segment - 1); - continue; - } else { - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - segmentime_size - 1); - continue; - } - } else { - dst_gdram2nram = data_nram; - src_gdram2nram = input + index - t_shift * channel_size * hw_size; - if (cur_sequence_index - t_shift + segmentime_size < time_size) { - count_gdram2nram = segmentime_size - 1; - count_nram2gdram = segmentime_size - 1; - } else { - count_gdram2nram = time_size - (cur_sequence_index - t_shift) - 1; - count_nram2gdram = - (segmentime_size - 1) < (time_size - cur_sequence_index - 1) - ? (segmentime_size - 1) - : (time_size - cur_sequence_index - 1); - } - } - } - __memcpy(dst_gdram2nram, src_gdram2nram, hw_size * sizeof(T), GDRAM2NRAM, - hw_size * sizeof(T), channel_size * hw_size * sizeof(T), - count_gdram2nram); - __memcpy(output + index, data_nram, hw_size * sizeof(T), NRAM2GDRAM, - channel_size * hw_size * sizeof(T), hw_size * sizeof(T), - count_nram2gdram); - __asm__ volatile("sync;"); - } -} - -__mlu_entry__ void MLUUnion1KernelTinShift( - const void *input, const void *shifts, void *output, const int batch_size, - const int time_size, const int channel_size, const int hw_size, - const int group_size, const int group_channel, - const cnrtDataType_t data_dtype) { - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - switch (data_dtype) { - case CNRT_FLOAT16: { - mluMultiKernelTinShift((half *)input, (const int *)shifts, (half *)output, - batch_size, time_size, channel_size, hw_size, - group_size, group_channel); - }; break; - case CNRT_FLOAT32: { - mluMultiKernelTinShift((float *)input, (const int *)shifts, - (float *)output, batch_size, time_size, - channel_size, hw_size, group_size, group_channel); - }; break; - default: { return; } - } -} - -__mlu_entry__ void MLUUnion1KernelTinShiftSplitSequence( - const void *input, const void *shifts, void *output, const int batch_size, - const int time_size, const int channel_size, const int hw_size, - const int group_size, const int group_channel, - const int max_number_hw_per_core, const int max_length_per_core, - const cnrtDataType_t data_dtype) { - // make sure that memcore is not used - if (coreId == 0x80) { - return; - } - switch (data_dtype) { - case CNRT_FLOAT16: { - mluMultiKernelTinShiftSplitSequence( - (half *)input, (const int *)shifts, (half *)output, batch_size, - time_size, channel_size, hw_size, group_size, group_channel, - max_number_hw_per_core, max_length_per_core); - }; break; - case CNRT_FLOAT32: { - mluMultiKernelTinShiftSplitSequence( - (float *)input, (const int *)shifts, (float *)output, batch_size, - time_size, channel_size, hw_size, group_size, group_channel, - max_number_hw_per_core, max_length_per_core); - }; break; - default: { return; } - } -} - -void KernelTinShiftForward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const void *input, const void *shifts, void *output, const int batch_size, - const int time_size, const int channel_size, const int hw_size, - const int group_size, const int group_channel, - const cnrtDataType_t data_dtype, const int channel_per_core, - const int max_number_hw_per_core, const int max_length_per_core) { - if (channel_per_core >= 1) { - MLUUnion1KernelTinShift<<>>( - input, shifts, output, batch_size, time_size, channel_size, hw_size, - group_size, group_channel, data_dtype); - } else { - MLUUnion1KernelTinShiftSplitSequence<<>>( - input, shifts, output, batch_size, time_size, channel_size, hw_size, - group_size, group_channel, max_number_hw_per_core, max_length_per_core, - data_dtype); - } -} - -void KernelTinShiftBackward( - cnrtDim3_t k_dim, cnrtFunctionType_t k_type, cnrtQueue_t queue, - const void *grad_output, const void *shifts, void *grad_input, - const int batch_size, const int time_size, const int channel_size, - const int hw_size, const int group_size, const int group_channel, - const cnrtDataType_t data_dtype, const int channel_per_core, - const int max_number_hw_per_core, const int max_length_per_core) { - if (channel_per_core >= 1) { - MLUUnion1KernelTinShift<<>>( - grad_output, shifts, grad_input, batch_size, time_size, channel_size, - hw_size, group_size, group_channel, data_dtype); - } else { - MLUUnion1KernelTinShiftSplitSequence<<>>( - grad_output, shifts, grad_input, batch_size, time_size, channel_size, - hw_size, group_size, group_channel, max_number_hw_per_core, - max_length_per_core, data_dtype); - } -} diff --git a/mmcv/ops/csrc/common/mps/MPSDevice.h b/mmcv/ops/csrc/common/mps/MPSDevice.h deleted file mode 100644 index e1d9d49..0000000 --- a/mmcv/ops/csrc/common/mps/MPSDevice.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright © 2022 Apple Inc. - -// This file is modify from: -// https://github.com/pytorch/pytorch/blob/a85d1f0bcdd02cf18d3b0517337458cb51a18cdb/aten/src/ATen/mps/MPSDevice.h - -#pragma once -#include -#include -#include - -#ifdef __OBJC__ -#include -#include -#include -typedef id MTLDevice_t; -#else -typedef void* MTLDevice; -typedef void* MTLDevice_t; -#endif - -using namespace std; - -namespace at { -namespace mps { - -//----------------------------------------------------------------- -// MPSDevice -// -// MPSDevice is a singleton class that returns the default device -//----------------------------------------------------------------- - -class TORCH_API MPSDevice { - public: - /** - * MPSDevice should not be cloneable. - */ - MPSDevice(MPSDevice& other) = delete; - /** - * MPSDevice should not be assignable. - */ - void operator=(const MPSDevice&) = delete; - /** - * Gets single instance of the Device. - */ - static MPSDevice* getInstance(); - /** - * Returns the single device. - */ - MTLDevice_t device() { return _mtl_device; } - - ~MPSDevice(); - - private: - static MPSDevice* _device; - MTLDevice_t _mtl_device; - MPSDevice(); -}; - -TORCH_API bool is_available(); - -TORCH_API at::Allocator* GetMPSAllocator(bool useSharedAllocator = false); - -} // namespace mps -} // namespace at diff --git a/mmcv/ops/csrc/common/mps/MPSLibrary.h b/mmcv/ops/csrc/common/mps/MPSLibrary.h deleted file mode 100644 index 41c33fb..0000000 --- a/mmcv/ops/csrc/common/mps/MPSLibrary.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _MPS_LIBRARY_H_ -#define _MPS_LIBRARY_H_ - -#include -#include - -#ifdef __OBJC__ -#include -#include -#include - -typedef id MTLComputePipelineState_t; -typedef id MTLLibrary_t; -#else -typedef void* MTLComputePipelineState; -typedef void* MTLComputePipelineState_t; -typedef void* MTLLibrary; -typedef void* MTLLibrary_t; -#endif - -class MPSLibrary { - public: - // disable constructor for singleton - static MPSLibrary* createFromUrl(const std::string& library_url); - static MPSLibrary* createFromSource(const std::string& source); - ~MPSLibrary(); - - MTLLibrary_t library() { return _library; } - - MTLComputePipelineState_t getComputePipelineState( - const std::string& function_name); - - private: - MTLLibrary_t _library; - std::unordered_map _pso_map; -}; - -class MPSLibraryManager { - public: - // disable constructor for singleton - MPSLibraryManager(const MPSLibraryManager&) = delete; - MPSLibraryManager& operator=(const MPSLibraryManager&) = delete; - MPSLibraryManager(MPSLibraryManager&&) = delete; - MPSLibraryManager& operator=(MPSLibraryManager&&) = delete; - - static MPSLibraryManager* getInstance(); - - bool hasLibrary(const std::string& name); - - MPSLibrary* getLibrary(const std::string& library_url); - - MPSLibrary* createLibraryFromSouce(const std::string& name, - const std::string& sources); - - ~MPSLibraryManager(); - - private: - MPSLibraryManager(); - std::unordered_map> _library_map; -}; -#endif diff --git a/mmcv/ops/csrc/common/mps/MPSLibrary.mm b/mmcv/ops/csrc/common/mps/MPSLibrary.mm deleted file mode 100644 index 99addc7..0000000 --- a/mmcv/ops/csrc/common/mps/MPSLibrary.mm +++ /dev/null @@ -1,107 +0,0 @@ -#include "MPSLibrary.h" -#include "MPSDevice.h" - -static std::unique_ptr mps_library_manager=nullptr; - -MPSLibraryManager* MPSLibraryManager::getInstance() { - if(!mps_library_manager) - mps_library_manager = std::unique_ptr(new MPSLibraryManager()); - return mps_library_manager.get(); -} - -MPSLibraryManager::~MPSLibraryManager() {} - -MPSLibraryManager::MPSLibraryManager() {} - -bool MPSLibraryManager::hasLibrary(const std::string& name) { - return _library_map.find(name) != _library_map.end(); -} - -MPSLibrary* MPSLibraryManager::getLibrary(const std::string& library_url) { - if (_library_map.find(library_url) != _library_map.end()) { - return _library_map[library_url].get(); - } - _library_map.emplace(std::make_pair( - library_url, std::unique_ptr(MPSLibrary::createFromUrl(library_url)))); - return _library_map[library_url].get(); -} - -MPSLibrary* MPSLibraryManager::createLibraryFromSouce(const std::string& name, - const std::string& source) { - NSString* ns_name = [NSString stringWithCString:name.c_str()]; - if (_library_map.find(name) != _library_map.end()) { - NSLog(@"Library %@ already exist.", ns_name); - return nullptr; - } - - _library_map.emplace( - std::make_pair(name, std::unique_ptr(MPSLibrary::createFromSource(source)))); - return _library_map[name].get(); -} - -MPSLibrary* MPSLibrary::createFromUrl(const std::string& library_url) { - MPSLibrary* library = new MPSLibrary(); - @autoreleasepool { - NSError* error = nil; - - // load library and func - NSString* utl_str = [NSString stringWithCString:library_url.c_str()]; - NSURL* metal_url = [NSURL fileURLWithPath:utl_str]; - library->_library = [at::mps::MPSDevice::getInstance()->device() newLibraryWithURL:metal_url - error:&error]; - if (library->_library == nil) { - NSLog(@"Failed to find library, error %@.", error); - exit(1); - } - } - - return library; -} - -MPSLibrary* MPSLibrary::createFromSource(const std::string& sources) { - MPSLibrary* library = new MPSLibrary(); - @autoreleasepool { - NSError* error = nil; - - // load library and func - NSString* code_str = [NSString stringWithCString:sources.c_str()]; - library->_library = [at::mps::MPSDevice::getInstance()->device() newLibraryWithSource:code_str - options:nil - error:&error]; - if (library->_library == nil) { - NSLog(@"Failed to find library, error %@.", error); - exit(1); - } - } - - return library; -} - -MPSLibrary::~MPSLibrary() { - [_library release]; - _library = nil; -} - -MTLComputePipelineState_t MPSLibrary::getComputePipelineState(const std::string& function_name) { - if (_pso_map.find(function_name) != _pso_map.end()) { - return _pso_map[function_name]; - } - - MTLComputePipelineState_t pso; - @autoreleasepool { - NSError* error = nil; - - // create function - NSString* function_name_str = [NSString stringWithCString:function_name.c_str()]; - id func = [_library newFunctionWithName:function_name_str]; - if (func == nil) { - NSLog(@"Failed to created pipeline state object, error %@.", error); - exit(1); - } - // create pipeline - pso = [at::mps::MPSDevice::getInstance()->device() newComputePipelineStateWithFunction:func - error:&error]; - _pso_map.emplace(std::make_pair(function_name, pso)); - } - return _pso_map[function_name]; -} diff --git a/mmcv/ops/csrc/common/mps/MPSStream.h b/mmcv/ops/csrc/common/mps/MPSStream.h deleted file mode 100644 index 54cd388..0000000 --- a/mmcv/ops/csrc/common/mps/MPSStream.h +++ /dev/null @@ -1,132 +0,0 @@ -// Copyright © 2022 Apple Inc. - -// This file is modify from: -// https://github.com/pytorch/pytorch/blob/a85d1f0bcdd02cf18d3b0517337458cb51a18cdb/aten/src/ATen/mps/MPSStream.h - -#pragma once - -#include -#include - -#include -#include -#include -#include "MPSDevice.h" - -#ifdef __OBJC__ -#include -#include -#include -#include -typedef id MTLCommandQueue_t; -typedef id MTLCommandBuffer_t; -typedef id MTLSharedEvent_t; -typedef id MTLDevice_t; -#else -typedef void* MTLCommandQueue_t; -typedef void* MTLCommandQueue; -typedef void* MTLCommandBuffer_t; -typedef void* MTLCommandBuffer; -typedef void* MTLSharedEvent_t; -typedef void* dispatch_queue_t; -typedef void* MTLDevice_t; -#define nil NULL; -#endif - -namespace at { -namespace mps { - -//----------------------------------------------------------------- -// MPSStream -//----------------------------------------------------------------- - -class TORCH_API MPSStream { - public: - enum Unchecked { UNCHECKED }; - /// Construct a MPSStream from a Stream. This construction is checked, - /// and will raise an error if the Stream is not, in fact, a MPS stream. - explicit MPSStream(Stream stream); - - ~MPSStream(); - MTLCommandQueue_t commandQueue() const { return _commandQueue; }; - dispatch_queue_t queue() const { return _serialQueue; } - - MTLCommandBuffer_t commandBuffer(); - void commit(bool flush); - void commitAndWait(); - void synchronize(); - - void flush(); - - /// Get the MPS device index that this stream is associated with. - c10::DeviceIndex device_index() const { return _stream.device_index(); } - - MTLCommandQueue_t stream() const { return _commandQueue; }; - - MTLDevice_t device() const { return [_commandQueue device]; } - - /// Explicit conversion to Stream. - Stream unwrap() const { return _stream; } - - private: - Stream _stream; - MTLCommandQueue_t _commandQueue = nil; - MTLCommandBuffer_t _commandBuffer = nil; - void _flush(bool commitAndWait) const; - - dispatch_queue_t _serialQueue = nullptr; -}; - -/** - * Get the current MPS stream - */ -TORCH_API MPSStream* getCurrentMPSStream(); - -/** - * Get the default MPS stream - */ -TORCH_API MPSStream* getDefaultMPSStream(); - -//----------------------------------------------------------------- -// MPSStreamImpl -//----------------------------------------------------------------- - -class TORCH_API MPSStreamImpl { - public: - /** - * Gets single instance of the MPSStream. - */ - static MPSStream* getInstance(); - - private: - static MPSStream* _stream; - MPSStreamImpl(); -}; - -//----------------------------------------------------------------- -// MPSEvent -//----------------------------------------------------------------- - -struct TORCH_API MPSEvent { - MPSEvent(); - // MPSEvent(id device); - - ~MPSEvent(); - MTLSharedEvent_t event() const { return _event; } - - void recordEvent(MPSStream* stream); - void waitForEvent(MPSStream* queue); // waits on the cpu - bool queryEvent(); - uint64_t getCurrentValue() { return _currentValue; } - void setCurrentValue(uint64_t currValue) { _currentValue = currValue; } - - private: - bool _isRecorded = false; - uint64_t _currentValue = 0; - MTLSharedEvent_t _event; -}; - -typedef MPSEvent* mpsEvent_t; - -} // namespace mps -} // namespace at diff --git a/mmcv/ops/csrc/common/mps/MPSUtils.h b/mmcv/ops/csrc/common/mps/MPSUtils.h deleted file mode 100644 index 2a4ce6d..0000000 --- a/mmcv/ops/csrc/common/mps/MPSUtils.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef _MPS_UTILS_H_ -#define _MPS_UTILS_H_ -#include -#ifdef __OBJC__ -#include -#include -#include - -typedef id MTLBuffer_t; -typedef id MTLComputeCommandEncoder_t; -#else -typedef void* MTLBuffer; -typedef void* MTLBuffer_t; -typedef void* MTLComputeCommandEncoder; -typedef void* MTLComputeCommandEncoder_t; -#endif - -// utils -static inline MTLBuffer_t getMTLBufferStorage(const at::Tensor& tensor) { - return __builtin_bit_cast(MTLBuffer_t, tensor.storage().data()); -} - -template , at::Tensor>::value, bool> = true> -void setMTLArg(MTLComputeCommandEncoder_t encoder, int index, T&& t); - -template , at::Tensor>::value, bool> = true> -void setMTLArg(MTLComputeCommandEncoder_t encoder, int index, T&& t) { - [encoder setBuffer:getMTLBufferStorage(t) offset:0 atIndex:index]; -} - -template , at::Tensor>::value, bool>> -void setMTLArg(MTLComputeCommandEncoder_t encoder, int index, T&& t) { - [encoder setBytes:&t length:sizeof(t) atIndex:index]; -} - -inline void setMTLArgsImpl(MTLComputeCommandEncoder_t, int) {} - -template -void setMTLArgsImpl(MTLComputeCommandEncoder_t encoder, int index, T&& t, Args&&... args) { - setMTLArg(encoder, index, std::forward(t)); - setMTLArgsImpl(encoder, index + 1, std::forward(args)...); -} - -template -void setMTLArgs(MTLComputeCommandEncoder_t encoder, MTLComputePipelineState_t pso, Args&&... args) { - [encoder setComputePipelineState:pso]; - setMTLArgsImpl(encoder, 0, std::forward(args)...); -} -#endif diff --git a/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp b/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp index f68e874..c7f9f35 100644 --- a/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp +++ b/mmcv/ops/csrc/common/pytorch_cpp_helper.hpp @@ -1,25 +1,22 @@ #ifndef PYTORCH_CPP_HELPER #define PYTORCH_CPP_HELPER -#include +#include #include using namespace at; +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) + #define CHECK_CUDA(x) \ TORCH_CHECK(x.device().is_cuda(), #x " must be a CUDA tensor") -#define CHECK_MLU(x) \ - TORCH_CHECK(x.device().type() == at::kMLU, #x " must be a MLU tensor") #define CHECK_CPU(x) \ - TORCH_CHECK(x.device().type() == at::kCPU, #x " must be a CPU tensor") + TORCH_CHECK(!x.device().is_cuda(), #x " must be a CPU tensor") #define CHECK_CONTIGUOUS(x) \ TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") #define CHECK_CUDA_INPUT(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) -#define CHECK_MLU_INPUT(x) \ - CHECK_MLU(x); \ - CHECK_CONTIGUOUS(x) #define CHECK_CPU_INPUT(x) \ CHECK_CPU(x); \ CHECK_CONTIGUOUS(x) diff --git a/mmcv/ops/csrc/common/pytorch_cuda_helper.hpp b/mmcv/ops/csrc/common/pytorch_cuda_helper.hpp index 52e5126..9869b53 100644 --- a/mmcv/ops/csrc/common/pytorch_cuda_helper.hpp +++ b/mmcv/ops/csrc/common/pytorch_cuda_helper.hpp @@ -15,6 +15,5 @@ using at::Tensor; using phalf = at::Half; #define __PHALF(x) (x) -#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) #endif // PYTORCH_CUDA_HELPER diff --git a/mmcv/ops/csrc/common/pytorch_mlu_helper.hpp b/mmcv/ops/csrc/common/pytorch_mlu_helper.hpp deleted file mode 100644 index e49572c..0000000 --- a/mmcv/ops/csrc/common/pytorch_mlu_helper.hpp +++ /dev/null @@ -1,61 +0,0 @@ -/************************************************************************* - * Copyright (C) 2021 Cambricon. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - *************************************************************************/ -#ifndef PYTORCH_MLU_HELPER_HPP_ -#define PYTORCH_MLU_HELPER_HPP_ - -#ifdef MMCV_WITH_MLU -#include "aten.h" - -#define NFU_ALIGN_SIZE 128 - -#define PAD_UP(x, y) (((x) / (y) + (int)((x) % (y) > 0)) * (y)) - -#define PAD_DOWN(x, y) (((x) / (y)) * (y)) - -#define CEIL_DIV(x, y) (((x) + (y)-1) / (y)) - -#define CEIL_ALIGN(x, y) (((x) + (y)-1) / (y) * (y)) - -inline int32_t getJobLimitCapability() { - CNcontext drv_ctx; - TORCH_CHECK(CN_SUCCESS == cnCtxGetCurrent(&drv_ctx), "cnCtxGetCurrent fails"); - CNctxConfigParam ctx_conf_param; - TORCH_CHECK( - CN_SUCCESS == cnGetCtxConfigParam(drv_ctx, CN_CTX_CONFIG_UNION_LIMIT, - &ctx_conf_param), - "cnGetCtxConfigParam fails."); - return (int32_t)ctx_conf_param.unionLimit; -} - -inline int32_t getCoreNumOfJobLimitCapability() { - switch (getJobLimitCapability()) { - default: - return torch_mlu::getDeviceAttr(cnrtAttrMcorePerCluster) * - getJobLimitCapability(); - case CN_KERNEL_CLASS_BLOCK: - return 1; - case CN_KERNEL_CLASS_UNION: - return torch_mlu::getDeviceAttr(cnrtAttrMcorePerCluster); - case CN_KERNEL_CLASS_UNION2: - return torch_mlu::getDeviceAttr(cnrtAttrMcorePerCluster) * 2; - case CN_KERNEL_CLASS_UNION4: - return torch_mlu::getDeviceAttr(cnrtAttrMcorePerCluster) * 4; - case CN_KERNEL_CLASS_UNION8: - return torch_mlu::getDeviceAttr(cnrtAttrMcorePerCluster) * 8; - case CN_KERNEL_CLASS_UNION16: - return torch_mlu::getDeviceAttr(cnrtAttrMcorePerCluster) * 16; - } -} - -#endif // MMCV_WITH_MLU - -#endif // PYTORCH_MLU_HELPER_HPP_ diff --git a/mmcv/ops/csrc/common/pytorch_npu_helper.hpp b/mmcv/ops/csrc/common/pytorch_npu_helper.hpp deleted file mode 100644 index 88607d2..0000000 --- a/mmcv/ops/csrc/common/pytorch_npu_helper.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/****************************************************************************** - * Copyright (c) 2022 Huawei Technologies Co., Ltd - * All rights reserved. - * - * Licensed under the BSD 3-Clause License (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://opensource.org/licenses/BSD-3-Clause - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -#ifndef PYTORCH_NPU_HELPER_HPP_ -#define PYTORCH_NPU_HELPER_HPP_ - -#include -#include -#include - -#include "pytorch_cpp_helper.hpp" -#include "pytorch_device_registry.hpp" - -#define NPU_NAME_SPACE at_npu::native - -#define REGISTER_NPU_IMPL(key, value) REGISTER_DEVICE_IMPL(key, XLA, value) - -#define CHECK_NPU(x) \ - TORCH_CHECK(x.device().type() == at::kXLA, #x " must be a NPU tensor") - -#endif // PYTORCH_NPU_HELPER_HPP_ diff --git a/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h b/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h deleted file mode 100644 index f23ff44..0000000 --- a/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright 2019 Yan Yan -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PARAMS_GRID_H_ -#define PARAMS_GRID_H_ -#include -#include - -namespace detail { -template -int getTotalSize(std::vector arg) { - return arg.size(); -} - -template -int getTotalSize(std::vector arg, std::vector... args) { - return arg.size() * getTotalSize(args...); -} - -template -int getSize(std::vector arg) { - return arg.size(); -} - -template -void assigner(TT &src, std::vector counter, std::vector &arg) { - std::get(src) = arg[counter[Idx]]; -} - -template -void assigner(TT &src, std::vector counter, std::vector &arg, - std::vector &... args) { - std::get(src) = arg[counter[Idx]]; - assigner(src, counter, args...); -} -} // namespace detail - -template -std::vector> paramsGrid(std::vector... args) { - int length = detail::getTotalSize(args...); - std::vector sizes = {detail::getSize(args)...}; - int size = sizes.size(); - - std::vector> params(length); - std::vector counter(size); - for (int i = 0; i < length; ++i) { - detail::assigner<0>(params[i], counter, args...); - counter[size - 1] += 1; - for (int c = size - 1; c >= 0; --c) { - if (counter[c] == sizes[c] && c > 0) { - counter[c - 1] += 1; - counter[c] = 0; - } - } - } - return params; -} - -#endif diff --git a/mmcv/ops/csrc/common/utils/spconv/prettyprint.h b/mmcv/ops/csrc/common/utils/spconv/prettyprint.h deleted file mode 100644 index 0a6bdc3..0000000 --- a/mmcv/ops/csrc/common/utils/spconv/prettyprint.h +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright Louis Delacroix 2010 - 2014. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) -// -// A pretty printing library for C++ -// -// Usage: -// Include this header, and operator<< will "just work". - -#ifndef H_PRETTY_PRINT -#define H_PRETTY_PRINT - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace pretty_print { -namespace detail { -// SFINAE type trait to detect whether T::const_iterator exists. - -struct sfinae_base { - using yes = char; - using no = yes[2]; -}; - -template -struct has_const_iterator : private sfinae_base { - private: - template - static yes &test(typename C::const_iterator *); - template - static no &test(...); - - public: - static const bool value = sizeof(test(nullptr)) == sizeof(yes); - using type = T; -}; - -template -struct has_begin_end : private sfinae_base { - private: - template - static yes & - f(typename std::enable_if< - std::is_same(&C::begin)), - typename C::const_iterator (C::*)() const>::value>::type *); - - template - static no &f(...); - - template - static yes &g(typename std::enable_if< - std::is_same(&C::end)), - typename C::const_iterator (C::*)() const>::value, - void>::type *); - - template - static no &g(...); - - public: - static bool const beg_value = sizeof(f(nullptr)) == sizeof(yes); - static bool const end_value = sizeof(g(nullptr)) == sizeof(yes); -}; - -} // namespace detail - -// Holds the delimiter values for a specific character type - -template -struct delimiters_values { - using char_type = TChar; - const char_type *prefix; - const char_type *delimiter; - const char_type *postfix; -}; - -// Defines the delimiter values for a specific container and character type - -template -struct delimiters { - using type = delimiters_values; - static const type values; -}; - -// Functor to print containers. You can use this directly if you want -// to specify a non-default delimiters type. The printing logic can -// be customized by specializing the nested template. - -template , - typename TDelimiters = delimiters> -struct print_container_helper { - using delimiters_type = TDelimiters; - using ostream_type = std::basic_ostream; - - template - struct printer { - static void print_body(const U &c, ostream_type &stream) { - using std::begin; - using std::end; - - auto it = begin(c); - const auto the_end = end(c); - - if (it != the_end) { - for (;;) { - stream << *it; - - if (++it == the_end) break; - - if (delimiters_type::values.delimiter != NULL) - stream << delimiters_type::values.delimiter; - } - } - } - }; - - print_container_helper(const T &container) : container_(container) {} - - inline void operator()(ostream_type &stream) const { - if (delimiters_type::values.prefix != NULL) - stream << delimiters_type::values.prefix; - - printer::print_body(container_, stream); - - if (delimiters_type::values.postfix != NULL) - stream << delimiters_type::values.postfix; - } - - private: - const T &container_; -}; - -// Specialization for pairs - -template -template -struct print_container_helper::printer> { - using ostream_type = - typename print_container_helper::ostream_type; - - static void print_body(const std::pair &c, ostream_type &stream) { - stream << c.first; - if (print_container_helper::delimiters_type::values - .delimiter != NULL) - stream << print_container_helper::delimiters_type::values - .delimiter; - stream << c.second; - } -}; - -// Specialization for tuples - -template -template -struct print_container_helper::printer> { - using ostream_type = - typename print_container_helper::ostream_type; - using element_type = std::tuple; - - template - struct Int {}; - - static void print_body(const element_type &c, ostream_type &stream) { - tuple_print(c, stream, Int<0>()); - } - - static void tuple_print(const element_type &, ostream_type &, - Int) {} - - static void tuple_print( - const element_type &c, ostream_type &stream, - typename std::conditional, - std::nullptr_t>::type) { - stream << std::get<0>(c); - tuple_print(c, stream, Int<1>()); - } - - template - static void tuple_print(const element_type &c, ostream_type &stream, Int) { - if (print_container_helper::delimiters_type::values - .delimiter != NULL) - stream << print_container_helper::delimiters_type::values - .delimiter; - - stream << std::get(c); - - tuple_print(c, stream, Int()); - } -}; - -// Prints a print_container_helper to the specified stream. - -template -inline std::basic_ostream &operator<<( - std::basic_ostream &stream, - const print_container_helper &helper) { - helper(stream); - return stream; -} - -// Basic is_container template; specialize to derive from std::true_type for all -// desired container types - -template -struct is_container - : public std::integral_constant::value && - detail::has_begin_end::beg_value && - detail::has_begin_end::end_value> {}; - -template -struct is_container : std::true_type {}; - -template -struct is_container : std::false_type {}; - -template -struct is_container> : std::true_type {}; - -template -struct is_container> : std::true_type {}; - -template -struct is_container> : std::true_type {}; - -// Default delimiters - -template -struct delimiters { - static const delimiters_values values; -}; -template -const delimiters_values delimiters::values = {"[", ", ", "]"}; -template -struct delimiters { - static const delimiters_values values; -}; -template -const delimiters_values delimiters::values = {L"[", L", ", - L"]"}; - -// Delimiters for (multi)set and unordered_(multi)set - -template -struct delimiters<::std::set, char> { - static const delimiters_values values; -}; - -template -const delimiters_values - delimiters<::std::set, char>::values = {"{", ", ", - "}"}; - -template -struct delimiters<::std::set, wchar_t> { - static const delimiters_values values; -}; - -template -const delimiters_values - delimiters<::std::set, wchar_t>::values = { - L"{", L", ", L"}"}; - -template -struct delimiters<::std::multiset, char> { - static const delimiters_values values; -}; - -template -const delimiters_values - delimiters<::std::multiset, char>::values = { - "{", ", ", "}"}; - -template -struct delimiters<::std::multiset, wchar_t> { - static const delimiters_values values; -}; - -template -const delimiters_values - delimiters<::std::multiset, wchar_t>::values = { - L"{", L", ", L"}"}; - -template -struct delimiters<::std::unordered_set, char> { - static const delimiters_values values; -}; - -template -const delimiters_values delimiters< - ::std::unordered_set, char>::values = { - "{", ", ", "}"}; - -template -struct delimiters<::std::unordered_set, wchar_t> { - static const delimiters_values values; -}; - -template -const delimiters_values delimiters< - ::std::unordered_set, wchar_t>::values = { - L"{", L", ", L"}"}; - -template -struct delimiters<::std::unordered_multiset, - char> { - static const delimiters_values values; -}; - -template -const delimiters_values delimiters< - ::std::unordered_multiset, char>::values = { - "{", ", ", "}"}; - -template -struct delimiters<::std::unordered_multiset, - wchar_t> { - static const delimiters_values values; -}; - -template -const delimiters_values - delimiters<::std::unordered_multiset, - wchar_t>::values = {L"{", L", ", L"}"}; - -// Delimiters for pair and tuple - -template -struct delimiters, char> { - static const delimiters_values values; -}; -template -const delimiters_values delimiters, char>::values = { - "(", ", ", ")"}; -template -struct delimiters<::std::pair, wchar_t> { - static const delimiters_values values; -}; -template -const delimiters_values - delimiters<::std::pair, wchar_t>::values = {L"(", L", ", L")"}; - -template -struct delimiters, char> { - static const delimiters_values values; -}; -template -const delimiters_values delimiters, char>::values = { - "(", ", ", ")"}; -template -struct delimiters<::std::tuple, wchar_t> { - static const delimiters_values values; -}; -template -const delimiters_values - delimiters<::std::tuple, wchar_t>::values = {L"(", L", ", L")"}; - -// Type-erasing helper class for easy use of custom delimiters. -// Requires TCharTraits = std::char_traits and TChar = char or wchar_t, -// and MyDelims needs to be defined for TChar. Usage: "cout << -// pretty_print::custom_delims(x)". - -struct custom_delims_base { - virtual ~custom_delims_base() {} - virtual std::ostream &stream(::std::ostream &) = 0; - virtual std::wostream &stream(::std::wostream &) = 0; -}; - -template -struct custom_delims_wrapper : custom_delims_base { - custom_delims_wrapper(const T &t_) : t(t_) {} - - std::ostream &stream(std::ostream &s) { - return s << print_container_helper, Delims>( - t); - } - - std::wostream &stream(std::wostream &s) { - return s << print_container_helper, - Delims>(t); - } - - private: - const T &t; -}; - -template -struct custom_delims { - template - custom_delims(const Container &c) - : base(new custom_delims_wrapper(c)) {} - - std::unique_ptr base; -}; - -template -inline std::basic_ostream &operator<<( - std::basic_ostream &s, const custom_delims &p) { - return p.base->stream(s); -} - -// A wrapper for a C-style array given as pointer-plus-size. -// Usage: std::cout << pretty_print_array(arr, n) << std::endl; - -template -struct array_wrapper_n { - typedef const T *const_iterator; - typedef T value_type; - - array_wrapper_n(const T *const a, size_t n) : _array(a), _n(n) {} - inline const_iterator begin() const { return _array; } - inline const_iterator end() const { return _array + _n; } - - private: - const T *const _array; - size_t _n; -}; - -// A wrapper for hash-table based containers that offer local iterators to each -// bucket. Usage: std::cout << bucket_print(m, 4) << std::endl; (Prints bucket -// 5 of container m.) - -template -struct bucket_print_wrapper { - typedef typename T::const_local_iterator const_iterator; - typedef typename T::size_type size_type; - - const_iterator begin() const { return m_map.cbegin(n); } - - const_iterator end() const { return m_map.cend(n); } - - bucket_print_wrapper(const T &m, size_type bucket) : m_map(m), n(bucket) {} - - private: - const T &m_map; - const size_type n; -}; - -} // namespace pretty_print - -// Global accessor functions for the convenience wrappers - -template -inline pretty_print::array_wrapper_n pretty_print_array(const T *const a, - size_t n) { - return pretty_print::array_wrapper_n(a, n); -} - -template -pretty_print::bucket_print_wrapper bucket_print(const T &m, - typename T::size_type n) { - return pretty_print::bucket_print_wrapper(m, n); -} - -// Main magic entry point: An overload snuck into namespace std. -// Can we do better? - -namespace std { -// Prints a container to the stream using default delimiters - -template -inline typename enable_if<::pretty_print::is_container::value, - basic_ostream &>::type -operator<<(basic_ostream &stream, const T &container) { - return stream - << ::pretty_print::print_container_helper( - container); -} -} // namespace std - -#endif // H_PRETTY_PRINT diff --git a/mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h b/mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h deleted file mode 100644 index 026e35b..0000000 --- a/mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2019 Yan Yan -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace py = pybind11; - -template -std::vector array2Vector(TPyObject arr) { - py::array arr_np = arr; - size_t size = arr.attr("size").template cast(); - py::array_t arr_cc = arr_np; - std::vector data(arr_cc.data(), arr_cc.data() + size); - return data; -} - -template -std::vector arrayT2Vector(py::array_t arr) { - std::vector data(arr.data(), arr.data() + arr.size()); - return data; -} - -template -tv::TensorView array2TensorView(TPyObject arr) { - py::array arr_np = arr; - py::array_t arr_cc = arr_np; - tv::Shape shape; - for (int i = 0; i < arr_cc.ndim(); ++i) { - shape.push_back(arr_cc.shape(i)); - } - return tv::TensorView(arr_cc.mutable_data(), shape); -} -template -tv::TensorView arrayT2TensorView(py::array_t arr) { - tv::Shape shape; - for (int i = 0; i < arr.ndim(); ++i) { - shape.push_back(arr.shape(i)); - } - return tv::TensorView(arr.mutable_data(), shape); -} diff --git a/mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h b/mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h deleted file mode 100644 index def6fe5..0000000 --- a/mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h +++ /dev/null @@ -1,295 +0,0 @@ -// Copyright 2019 Yan Yan -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef SPCONV_GEOMETRY_H_ -#define SPCONV_GEOMETRY_H_ - -#include - -#include -#include - -template -TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos, - const Index *kernelSize, - const Index *stride, const Index *padding, - const Index *dilation, - const Index *outSpatialShape, Index *out) { - Index lowers[NDim]; - Index uppers[NDim]; - Index counter[NDim]; - Index counterSize[NDim]; - Index pointCounter = 0; - Index val; - Index numPoints = 1; - Index m, offset; - bool valid = false; -#pragma unroll - for (unsigned i = 0; i < NDim; ++i) { - lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 + - stride[i] + padding[i]) / - stride[i]; - uppers[i] = (input_pos[i] + padding[i]) / stride[i]; - } - -#pragma unroll - for (unsigned i = 0; i < NDim; ++i) { - counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1); - numPoints *= counterSize[i]; - } - -#pragma unroll - for (unsigned i = 0; i < NDim; ++i) { - counter[i] = 0; - } - for (int i = 0; i < numPoints; ++i) { - valid = true; - m = 1; - offset = 0; -#pragma unroll - for (int j = NDim - 1; j >= 0; --j) { - val = uppers[j] - counter[j] * dilation[j]; - out[pointCounter * (NDim + 1) + j] = val; - if (val < 0 || (val > outSpatialShape[j] - 1)) { - valid = false; - // break; - } - offset += m * (input_pos[j] - val * stride[j] + padding[j]) / dilation[j]; - m *= kernelSize[j]; - } - - out[pointCounter * (NDim + 1) + NDim] = offset; - if (valid) ++pointCounter; - counter[NDim - 1] += 1; -#pragma unroll - for (int c = NDim - 1; c >= 0; --c) { - if (counter[c] == counterSize[c] && c > 0) { - counter[c - 1] += 1; - counter[c] = 0; - } - } - } - return pointCounter; -} - -template -TV_HOST_DEVICE Index getValidOutPosTranspose( - const Index *input_pos, const Index *kernelSize, const Index *stride, - const Index *padding, const Index *dilation, const Index *outSpatialShape, - Index *out) { - Index lowers[NDim]; - Index uppers[NDim]; - Index counter[NDim]; - Index counterSize[NDim]; - Index pointCounter = 0; - Index val; - Index numPoints = 1; - Index m, offset; - bool valid = false; -#pragma unroll - for (unsigned i = 0; i < NDim; ++i) { - lowers[i] = input_pos[i] * stride[i] - padding[i]; - uppers[i] = lowers[i] + (kernelSize[i] - 1) * dilation[i]; - } -#pragma unroll - for (unsigned i = 0; i < NDim; ++i) { - counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1); - numPoints *= counterSize[i]; - } -#pragma unroll - for (unsigned i = 0; i < NDim; ++i) { - counter[i] = 0; - } - for (int i = 0; i < numPoints; ++i) { - valid = true; - m = 1; - offset = 0; -#pragma unroll - for (int j = NDim - 1; j >= 0; --j) { - val = uppers[j] - counter[j] * dilation[j]; - out[pointCounter * (NDim + 1) + j] = val; - if (val < 0 || (val > outSpatialShape[j] - 1)) { - valid = false; - } - offset += m * (val - lowers[j]) / dilation[j]; - m *= kernelSize[j]; - } - out[pointCounter * (NDim + 1) + NDim] = offset; - if (valid) ++pointCounter; - counter[NDim - 1] += 1; -#pragma unroll - for (int c = NDim - 1; c >= 0; --c) { - if (counter[c] == counterSize[c] && c > 0) { - counter[c - 1] += 1; - counter[c] = 0; - } - } - } - return pointCounter; -} - -template -Index getIndicePairsConv(tv::TensorView indicesIn, - tv::TensorView indicesOut, - tv::TensorView gridsOut, - tv::TensorView indicePairs, - tv::TensorView indiceNum, - const Index *kernelSize, const Index *stride, - const Index *padding, const Index *dilation, - const Index *outSpatialShape) { - // indicesOut: num_active * kernelVolume * (NDim + 1) - Index numAct = 0; - auto numActIn = indicesIn.dim(0); - Index batchIdx = 0; - Index spatialVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - spatialVolume *= outSpatialShape[i]; - } - Index kernelVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - kernelVolume *= kernelSize[i]; - } - Index numValidPoints = 0; - std::vector validPoints_(kernelVolume * (NDim + 1)); - Index *validPoints = validPoints_.data(); - Index *pointPtr = nullptr; - for (int j = 0; j < numActIn; ++j) { - batchIdx = indicesIn(j, 0); - numValidPoints = getValidOutPos( - indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding, - dilation, outSpatialShape, validPoints); - for (Index i = 0; i < numValidPoints; ++i) { - pointPtr = validPoints + i * (NDim + 1); - auto offset = pointPtr[NDim]; - auto index = tv::rowArrayIdx(pointPtr, outSpatialShape) + - spatialVolume * batchIdx; - if (gridsOut[index] == -1) { - for (unsigned k = 1; k < NDim + 1; ++k) { - indicesOut(numAct, k) = pointPtr[k - 1]; - } - indicesOut(numAct, 0) = batchIdx; - gridsOut[index] = numAct++; - } - // indicePairs: [K, 2, L] - indicePairs(offset, 0, indiceNum[offset]) = j; - indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index]; - } - } - return numAct; -} - -template -Index getIndicePairsDeConv(tv::TensorView indicesIn, - tv::TensorView indicesOut, - tv::TensorView gridsOut, - tv::TensorView indicePairs, - tv::TensorView indiceNum, - const Index *kernelSize, const Index *stride, - const Index *padding, const Index *dilation, - const Index *outSpatialShape) { - Index numAct = 0; - auto numActIn = indicesIn.dim(0); - Index batchIdx = 0; - Index spatialVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - spatialVolume *= outSpatialShape[i]; - } - Index kernelVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - kernelVolume *= kernelSize[i]; - } - Index numValidPoints = 0; - std::vector validPoints_(kernelVolume * (NDim + 1)); - Index *validPoints = validPoints_.data(); - Index *pointPtr = nullptr; - for (int j = 0; j < numActIn; ++j) { - batchIdx = indicesIn(j, 0); - numValidPoints = getValidOutPosTranspose( - indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding, - dilation, outSpatialShape, validPoints); - for (Index i = 0; i < numValidPoints; ++i) { - pointPtr = validPoints + i * (NDim + 1); - auto offset = pointPtr[NDim]; - auto index = tv::rowArrayIdx(pointPtr, outSpatialShape) + - spatialVolume * batchIdx; - if (gridsOut[index] == -1) { - for (unsigned k = 1; k < NDim + 1; ++k) { - indicesOut(numAct, k) = pointPtr[k - 1]; - } - indicesOut(numAct, 0) = batchIdx; - gridsOut[index] = numAct++; - } - // indicePairs: [K, 2, L] - indicePairs(offset, 0, indiceNum[offset]) = j; - indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index]; - } - } - return numAct; -} - -template -Index getIndicePairsSubM(tv::TensorView indicesIn, - tv::TensorView gridsOut, - tv::TensorView indicePairs, - tv::TensorView indiceNum, - const Index *const kernelSize, - const Index *const stride, const Index *const padding, - const Index *dilation, - const Index *const outSpatialShape) { - auto numActIn = indicesIn.dim(0); - Index spatialVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - spatialVolume *= outSpatialShape[i]; - } - Index kernelVolume = 1; -#pragma unroll - for (int i = 0; i < NDim; ++i) { - kernelVolume *= kernelSize[i]; - } - Index numValidPoints = 0; - // Index validPoints[kernelVolume * (NDim + 1)]; - std::vector validPoints_(kernelVolume * (NDim + 1)); - Index *validPoints = validPoints_.data(); - Index *pointPtr = nullptr; - Index index = 0; - for (int j = 0; j < numActIn; ++j) { - index = tv::rowArrayIdx(indicesIn.data() + j * (NDim + 1) + 1, - outSpatialShape) + - spatialVolume * indicesIn(j, 0); - gridsOut[index] = j; - } - for (int j = 0; j < numActIn; ++j) { - numValidPoints = getValidOutPos( - indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding, - dilation, outSpatialShape, validPoints); - for (Index i = 0; i < numValidPoints; ++i) { - pointPtr = validPoints + i * (NDim + 1); - auto offset = pointPtr[NDim]; - index = tv::rowArrayIdx(pointPtr, outSpatialShape) + - spatialVolume * indicesIn(j, 0); - if (gridsOut[index] > -1) { - indicePairs(offset, 0, indiceNum[offset]) = j; - indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index]; - } - } - } - return numActIn; -} - -#endif diff --git a/mmcv/ops/csrc/common/utils/spconv/spconv/indice.h b/mmcv/ops/csrc/common/utils/spconv/spconv/indice.h deleted file mode 100644 index 96ce34e..0000000 --- a/mmcv/ops/csrc/common/utils/spconv/spconv/indice.h +++ /dev/null @@ -1,78 +0,0 @@ -// Copyright 2019 Yan Yan -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef SPARSE_CONV_INDICE_FUNCTOR_H_ -#define SPARSE_CONV_INDICE_FUNCTOR_H_ -#include - -namespace functor { -template -struct CreateConvIndicePairFunctorP1 { - Index operator()(const Device& d, tv::TensorView indicesIn, - tv::TensorView indicesOut, - tv::TensorView gridsOut, - tv::TensorView indicePairs, - tv::TensorView indiceNum, - tv::TensorView indicePairUnique, - const tv::SimpleVector kernelSize, - const tv::SimpleVector stride, - const tv::SimpleVector padding, - const tv::SimpleVector dilation, - const tv::SimpleVector outSpatialShape, - bool transpose); -}; - -template -struct CreateConvIndicePairFunctorP2 { - Index operator()(const Device& d, tv::TensorView indicesIn, - tv::TensorView indicesOut, - tv::TensorView gridsOut, - tv::TensorView indicePairs, - tv::TensorView indiceNum, - tv::TensorView indicePairUnique, - const tv::SimpleVector outSpatialShape, - bool transpose, bool resetGrid = false); -}; - -template -struct CreateConvIndicePairFunctor { - Index operator()(const Device& d, tv::TensorView indicesIn, - tv::TensorView indicesOut, - tv::TensorView gridsOut, - tv::TensorView indicePairs, - tv::TensorView indiceNum, - const tv::SimpleVector kernelSize, - const tv::SimpleVector stride, - const tv::SimpleVector padding, - const tv::SimpleVector dilation, - const tv::SimpleVector outSpatialShape, - bool transpose, bool resetGrid = false); -}; - -template -struct CreateSubMIndicePairFunctor { - Index operator()(const Device& d, tv::TensorView indicesIn, - tv::TensorView gridsOut, - tv::TensorView indicePairs, - tv::TensorView indiceNum, - const tv::SimpleVector kernelSize, - const tv::SimpleVector stride, - const tv::SimpleVector padding, - const tv::SimpleVector dilation, - const tv::SimpleVector outSpatialShape, - bool transpose, bool resetGrid = false); -}; -} // namespace functor - -#endif diff --git a/mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h b/mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h deleted file mode 100644 index 78f32ed..0000000 --- a/mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2019 Yan Yan -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef SPARSE_MAXPOOL_FUNCTOR_H_ -#define SPARSE_MAXPOOL_FUNCTOR_H_ -#include - -namespace functor { -template -struct SparseMaxPoolForwardFunctor { - void operator()(const Device& d, tv::TensorView outFeatures, - tv::TensorView inFeatures, - tv::TensorView indices, int size); -}; - -template -struct SparseMaxPoolBackwardFunctor { - void operator()(const Device& d, tv::TensorView outFeatures, - tv::TensorView inFeatures, - tv::TensorView fout, - tv::TensorView fin, - tv::TensorView indices, int size); -}; -} // namespace functor - -#endif diff --git a/mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h b/mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h deleted file mode 100644 index 8262b30..0000000 --- a/mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef MP_HELPER_H_ -#define MP_HELPER_H_ -#include -#include - -template -struct mp_list {}; - -template -using mp_list_c = mp_list...>; - -namespace detail { - -template -constexpr F mp_for_each_impl(mp_list, F &&f) { - return std::initializer_list{(f(T()), 0)...}, std::forward(f); -} - -template -constexpr F mp_for_each_impl(mp_list<>, F &&f) { - return std::forward(f); -} - -} // namespace detail - -namespace detail { - -template class B> -struct mp_rename_impl { - // An error "no type named 'type'" here means that the first argument to - // mp_rename is not a list -}; - -template