add csrc and mmdeploy module

546b4279 · limm · 502f4fb9 · 546b4279 · 546b4279 · 546b4279
Commit 546b4279 authored Jun 25, 2025 by limm
20 changed files
--- a/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/merge_shape_concate.cpp
+++ b/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/merge_shape_concate.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+#include "merge_shape_concate.h"
+
+#include <vector>
+
+#include "utils.h"
+
+namespace mmdeploy {
+namespace torch_jit {
+
+using c10::Symbol;
+using torch::jit::Block;
+using torch::jit::IValue;
+using torch::jit::Node;
+using torch::jit::TensorType;
+using torch::jit::Value;
+
+void MergeShapeConcate(Node* node) {
+  auto inputs = node->inputs();
+
+  std::vector<int64_t> gather_value;
+  Value* shape_from = nullptr;
+
+  std::vector<Node*> node_to_remove{node};
+
+  // check pattern shape->gather->unsqueeze->concate
+  for (auto input : inputs) {
+    auto unsqueeze_node = input->node();
+    if (!is_kind(unsqueeze_node, "onnx::Unsqueeze") || unsqueeze_node->output()->uses().size() != 1)
+      return;
+
+    if (unsqueeze_node->hasAttribute(Symbol::attr("axes"))) {
+      auto axes = unsqueeze_node->is(Symbol::attr("axes"));
+      if (axes.size() != 1 && axes[0] != 0) return;
+    }
+
+    auto gather_node = unsqueeze_node->input(0)->node();
+    if (!is_kind(gather_node, "onnx::Gather") || gather_node->i(Symbol::attr("axis")) != 0 ||
+        gather_node->output()->uses().size() != 1)
+      return;
+
+    auto gather_inputs = gather_node->inputs();
+    auto gather_data = gather_inputs[0];
+    auto gather_indices = gather_inputs[1];
+    auto shape_node = gather_data->node();
+    if (!is_kind(shape_node, "onnx::Shape") || shape_node->output()->uses().size() != 1) return;
+
+    auto current_shape_from = shape_node->input();
+    if (!shape_from) {
+      shape_from = current_shape_from;
+    } else {
+      if (shape_from != current_shape_from) return;
+    }
+
+    auto constant_node = gather_indices->node();
+    if (!is_kind(constant_node, "onnx::Constant")) return;
+
+    auto gather_indices_val = constant_node->t(Symbol::attr("value"));
+    int64_t* data_ptr = gather_indices_val.data_ptr<int64_t>();
+    if (gather_indices_val.dim() == 0) {
+      gather_value.push_back(data_ptr[0]);
+    } else {
+      int element_size = gather_indices_val.element_size();
+      for (int j = 0; j < element_size; ++j) {
+        gather_value.push_back(data_ptr[j]);
+      }
+    }
+
+    node_to_remove.insert(node_to_remove.end(), {unsqueeze_node, gather_node, shape_node});
+  }
+
+  // create constant value
+  auto graph = node->owningGraph();
+  auto const_node = graph->create(Symbol::onnx("Constant"));
+  const_node->t_(Symbol::attr("value"), at::tensor(gather_value));
+  auto first_node = node->owningGraph()->block()->nodes().front();
+  if (const_node != first_node) const_node->insertBefore(first_node);
+
+  // recreate shape node
+  auto shape_node = graph->create(Symbol::onnx("Shape"), {shape_from});
+  shape_node->insertBefore(node);
+
+  // create gather node
+  auto gather_node =
+      graph->create(Symbol::onnx("Gather"), {shape_node->output(), const_node->output()});
+
+  // insert into graph
+  gather_node->insertAfter(node);
+  node->output()->replaceAllUsesWith(gather_node->output());
+
+  for (auto n : node_to_remove) {
+    n->destroy();
+  }
+}
+
+void MergeShapeConcate(Block* block) {
+  auto graph = block->owningGraph();
+  auto it = block->nodes().begin();
+  while (it != block->nodes().end()) {
+    auto node = *it;
+    ++it;
+    for (auto block : node->blocks()) {
+      MergeShapeConcate(block);
+    }
+
+    if (is_kind(node, "onnx::Concat")) {
+      MergeShapeConcate(node);
+    }
+  }
+}
+
+void MergeShapeConcate(const std::shared_ptr<Graph>& graph) { MergeShapeConcate(graph->block()); }
+
+}  // namespace torch_jit
+}  // namespace mmdeploy
--- a/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/merge_shape_concate.h
+++ b/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/merge_shape_concate.h
+// Copyright (c) OpenMMLab. All rights reserved.
+#ifndef _MERGE_SHAPE_CONCATE_H_
+#define _MERGE_SHAPE_CONCATE_H_
+
+#include <torch/script.h>
+namespace mmdeploy {
+namespace torch_jit {
+using torch::jit::Graph;
+
+void MergeShapeConcate(const std::shared_ptr<Graph>& graph);
+}  // namespace torch_jit
+}  // namespace mmdeploy
+
+#endif
--- a/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/onnx_peephole.cpp
+++ b/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/onnx_peephole.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+#include "onnx_peephole.h"
+
+#include <torch/csrc/jit/passes/dead_code_elimination.h>
+
+#include <vector>
+
+#include "utils.h"
+
+namespace mmdeploy {
+namespace torch_jit {
+
+using c10::Symbol;
+using torch::jit::Block;
+using torch::jit::IValue;
+using torch::jit::Node;
+using torch::jit::TensorType;
+using torch::jit::Value;
+
+void RemoveReshapeChain(Node* node) {
+  // reshape->reshape => reshape
+  auto output = node->output();
+  if (!(output->hasUses())) {
+    return;
+  }
+  auto uses = output->uses();
+
+  for (auto use : uses) {
+    if (!is_kind(use.user, "onnx::Reshape") || use.offset != 0) {
+      return;
+    }
+  }
+
+  auto input = node->inputs()[0];
+  output->replaceAllUsesWith(input);
+
+  node->destroy();
+}
+
+void RemoveRedundantCast(Node* node) {
+  // Cast(type n)->Cast(type n) => Cast(type n)
+
+  auto to_type = node->i(Symbol::attr("to"));
+  auto input = node->input();
+
+  auto input_node = input->node();
+  if (is_kind(input_node, "onnx::Cast") && input_node->i(Symbol::attr("to")) == to_type) {
+    auto output = node->output();
+
+    output->replaceAllUsesWith(input);
+    node->destroy();
+  }
+}
+
+void ONNXPeephole(Block* block) {
+  auto graph = block->owningGraph();
+  auto it = block->nodes().begin();
+  while (it != block->nodes().end()) {
+    auto node = *it;
+    ++it;
+    for (auto block : node->blocks()) {
+      ONNXPeephole(block);
+    }
+
+    if (is_kind(node, "onnx::Reshape")) {
+      RemoveReshapeChain(node);
+    } else if (is_kind(node, "onnx::Cast")) {
+      RemoveRedundantCast(node);
+    }
+  }
+}
+
+void ONNXPeephole(const std::shared_ptr<Graph>& graph) {
+  ONNXPeephole(graph->block());
+  torch::jit::EliminateDeadCode(
+      graph->block(), true,
+      torch::jit::DCESideEffectPolicy::ALLOW_DELETING_NODES_WITH_SIDE_EFFECTS);
+}
+
+}  // namespace torch_jit
+}  // namespace mmdeploy
--- a/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/onnx_peephole.h
+++ b/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/onnx_peephole.h
+// Copyright (c) OpenMMLab. All rights reserved.
+#ifndef _ONNX_PEEPHOLE_H_
+#define _ONNX_PEEPHOLE_H_
+
+#include <torch/script.h>
+namespace mmdeploy {
+namespace torch_jit {
+using torch::jit::Graph;
+
+void ONNXPeephole(const std::shared_ptr<Graph>& graph);
+
+}  // namespace torch_jit
+}  // namespace mmdeploy
+
+#endif
--- a/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/utils.h
+++ b/csrc/mmdeploy/backend_ops/torchscript/optimizer/passes/onnx/utils.h
+#ifndef _PASSES_ONNX_UTILS_H_
+#define _PASSES_ONNX_UTILS_H_
+
+#include <torch/script.h>
+
+namespace mmdeploy {
+namespace torch_jit {
+using c10::Symbol;
+using torch::jit::Node;
+
+inline bool is_kind(const Node* node, const Symbol& symbol) { return node->kind() == symbol; }
+
+inline bool is_kind(const Node* node, const char* symbol_name) {
+  return is_kind(node, Symbol::fromQualString(symbol_name));
+}
+
+}  // namespace torch_jit
+}  // namespace mmdeploy
+
+#endif
--- a/csrc/mmdeploy/codebase/CMakeLists.txt
+++ b/csrc/mmdeploy/codebase/CMakeLists.txt
+# Copyright (c) OpenMMLab. All rights reserved.
+
+project(mmdeploy_codebase)
+
+set(CODEBASES "")
+if ("all" IN_LIST MMDEPLOY_CODEBASES)
+    list(APPEND CODEBASES "mmcls")
+    list(APPEND CODEBASES "mmdet")
+    list(APPEND CODEBASES "mmseg")
+    list(APPEND CODEBASES "mmocr")
+    list(APPEND CODEBASES "mmedit")
+    list(APPEND CODEBASES "mmpose")
+    list(APPEND CODEBASES "mmrotate")
+    list(APPEND CODEBASES "mmaction")
+else ()
+    set(CODEBASES ${MMDEPLOY_CODEBASES})
+endif ()
+
+foreach (codebase IN LISTS CODEBASES)
+    message(STATUS "build codebase: ${codebase}")
+    if (codebase STREQUAL "mmpretrain")
+        set(subdir_name "mmcls")
+    elseif (codebase STREQUAL "mmyolo")
+        set(subdir_name "mmdet")
+    elseif (codebase STREQUAL "mmagic")
+        set(subdir_name "mmedit")
+    else()
+	    set(subdir_name ${codebase})
+    endif()
+    add_subdirectory(${subdir_name})
+endforeach ()
--- a/csrc/mmdeploy/codebase/common.h
+++ b/csrc/mmdeploy/codebase/common.h
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_SRC_CODEBASE_COMMON_H_
+#define MMDEPLOY_SRC_CODEBASE_COMMON_H_
+
+#include "mmdeploy/core/device.h"
+#include "mmdeploy/core/module.h"
+#include "mmdeploy/core/registry.h"
+#include "mmdeploy/core/utils/formatter.h"
+#include "mmdeploy/experimental/module_adapter.h"
+
+namespace mmdeploy {
+
+using namespace framework;
+
+class Context {
+ public:
+  explicit Context(const Value& config) {
+    MMDEPLOY_DEBUG("config: {}", config);
+    device_ = config["context"]["device"].get<Device>();
+    stream_ = config["context"]["stream"].get<Stream>();
+  }
+
+  Device& device() { return device_; }
+  Stream& stream() { return stream_; }
+
+ protected:
+  Device device_;
+  Stream stream_;
+};
+
+template <class Tag>
+class CodebaseCreator : public Creator<Module> {
+ public:
+  std::string_view name() const noexcept override { return Tag::name; }
+  std::unique_ptr<Module> Create(const Value& cfg) override {
+    constexpr auto key{"component"};
+    if (!cfg.contains(key)) {
+      MMDEPLOY_ERROR("no key '{}' in config {}", key, cfg);
+      throw_exception(eInvalidArgument);
+    }
+    if (!cfg[key].is_string()) {
+      MMDEPLOY_ERROR("key '{}' is not a string", key);
+      throw_exception(eInvalidArgument);
+    }
+    auto postprocess_type = cfg[key].get<std::string>();
+    auto creator = gRegistry<Tag>().Get(postprocess_type);
+    if (creator == nullptr) {
+      MMDEPLOY_ERROR("Could not found entry '{}' in {}. Available components: {}", postprocess_type,
+                     Tag::name, gRegistry<Tag>().List());
+      throw_exception(eEntryNotFound);
+    }
+    return creator->Create(cfg);
+  }
+};
+
+#define MMDEPLOY_DECLARE_CODEBASE(codebase_type, codebase_name)      \
+  class codebase_type : public Context {                             \
+   public:                                                           \
+    static constexpr const auto name = #codebase_name;               \
+    using type = std::unique_ptr<Module>;                            \
+    explicit codebase_type(const Value& config) : Context(config) {} \
+  };                                                                 \
+  MMDEPLOY_DECLARE_REGISTRY(codebase_type, std::unique_ptr<Module>(const Value& config));
+
+#define MMDEPLOY_REGISTER_CODEBASE(codebase)              \
+  using codebase##_##Creator = CodebaseCreator<codebase>; \
+  MMDEPLOY_REGISTER_CREATOR(Module, codebase##_##Creator) \
+  MMDEPLOY_DEFINE_REGISTRY(codebase)
+
+#define MMDEPLOY_REGISTER_CODEBASE_COMPONENT(codebase, component_type)                    \
+  MMDEPLOY_REGISTER_FACTORY_FUNC(codebase, (component_type, 0), [](const Value& config) { \
+    return CreateTask(component_type(config));                                            \
+  })
+
+}  // namespace mmdeploy
+
+#endif  // MMDEPLOY_SRC_CODEBASE_COMMON_H_
--- a/csrc/mmdeploy/codebase/mmaction/CMakeLists.txt
+++ b/csrc/mmdeploy/codebase/mmaction/CMakeLists.txt
+# Copyright (c) OpenMMLab. All rights reserved.
+
+project(mmdeploy_mmaction)
+
+file(GLOB SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+
+target_link_libraries(${PROJECT_NAME} PRIVATE
+    mmdeploy_operation
+    mmdeploy_transform
+    mmdeploy_opencv_utils)
+
+add_library(mmdeploy::mmaction ALIAS ${PROJECT_NAME})
+
+set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} video_recognizer CACHE INTERNAL "")
--- a/csrc/mmdeploy/codebase/mmaction/base_head.cpp
+++ b/csrc/mmdeploy/codebase/mmaction/base_head.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include <algorithm>
+#include <numeric>
+
+#include "mmdeploy/codebase/mmaction/mmaction.h"
+#include "mmdeploy/core/tensor.h"
+#include "mmdeploy/core/utils/device_utils.h"
+
+namespace mmdeploy::mmaction {
+
+class BaseHead : public MMAction {
+ public:
+  explicit BaseHead(const Value& cfg) : MMAction(cfg) {
+    if (cfg.contains("params")) {
+      topk_ = cfg["params"].value("topk", 1);
+      if (topk_ <= 0) {
+        MMDEPLOY_ERROR("'topk' should be greater than 0, but got '{}'", topk_);
+        throw_exception(eInvalidArgument);
+      }
+    }
+  }
+
+  Result<Value> operator()(const Value& infer_res) {
+    MMDEPLOY_DEBUG("infer_res: {}", infer_res);
+    auto output = infer_res["output"].get<Tensor>();
+
+    if (!(output.shape().size() >= 2 && output.data_type() == DataType::kFLOAT)) {
+      MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
+                     (int)output.data_type());
+      return Status(eNotSupported);
+    }
+
+    auto class_num = (int)output.shape(1);
+
+    OUTCOME_TRY(auto _scores, MakeAvailableOnDevice(output, kHost, stream()));
+    OUTCOME_TRY(stream().Wait());
+
+    return GetLabels(_scores, class_num);
+  }
+
+ private:
+  Value GetLabels(const Tensor& scores, int class_num) const {
+    auto scores_data = scores.data<float>();
+    Labels output;
+    output.reserve(topk_);
+    std::vector<int> idx(class_num);
+    iota(begin(idx), end(idx), 0);
+    partial_sort(begin(idx), begin(idx) + topk_, end(idx),
+                 [&](int i, int j) { return scores_data[i] > scores_data[j]; });
+    for (int i = 0; i < topk_; ++i) {
+      auto label = Label{idx[i], scores_data[idx[i]]};
+      MMDEPLOY_DEBUG("label_id: {}, score: {}", label.label_id, label.score);
+      output.push_back(label);
+    }
+    return to_value(std::move(output));
+  }
+
+ private:
+  static constexpr const auto kHost = Device{0};
+  int topk_{1};
+};
+
+MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMAction, BaseHead);
+
+using SlowFastHead = BaseHead;
+MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMAction, SlowFastHead);
+
+using TSNHead = BaseHead;
+MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMAction, TSNHead);
+
+}  // namespace mmdeploy::mmaction
--- a/csrc/mmdeploy/codebase/mmaction/format_shape.cpp
+++ b/csrc/mmdeploy/codebase/mmaction/format_shape.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/codebase/mmaction/format_shape.h"
+
+#include "mmdeploy/core/utils/device_utils.h"
+#include "mmdeploy/core/utils/formatter.h"
+
+using namespace std;
+
+namespace mmdeploy::mmaction {
+
+FormatShape::FormatShape(const Value& args) {
+  input_format_ = args.value("input_format", std::string(""));
+  if (input_format_ != "NCHW" && input_format_ != "NCTHW") {
+    MMDEPLOY_ERROR("'input_format' should be 'NCHW' or 'NCTHW'");
+    throw_exception(eInvalidArgument);
+  }
+  permute_ = ::mmdeploy::operation::Managed<::mmdeploy::operation::Permute>::Create();
+}
+
+Result<void> FormatShape::MergeInputs(const std::vector<Tensor>& images, Tensor& inputs) {
+  auto N = static_cast<int64_t>(images.size());
+  auto H = images[0].shape(1);
+  auto W = images[0].shape(2);
+  auto C = images[0].shape(3);
+  auto& device = operation::gContext().device();
+  auto& stream = operation::gContext().stream();
+
+  TensorDesc desc = {device, DataType::kFLOAT, {N, H, W, C}};
+  inputs = Tensor(desc);
+  auto offset = 0UL;
+  auto n_item = H * W * C;
+  auto copy_size = n_item * sizeof(float);
+  for (int i = 0; i < N; i++) {
+    auto src_buffer = images[i].buffer();
+    auto dst_buffer = inputs.buffer();
+    OUTCOME_TRY(stream.Copy(src_buffer, dst_buffer, copy_size, 0, offset));
+    offset += copy_size;
+  }
+  return success();
+}
+
+Result<void> FormatShape::Format(const std::vector<Tensor>& images, Tensor& output, int clip_len,
+                                 int num_clips) {
+  Tensor inputs;
+  OUTCOME_TRY(MergeInputs(images, inputs));
+
+  // Tensor dst;
+  if (input_format_ == "NCHW") {
+    OUTCOME_TRY(FormatNCHW(inputs, clip_len, num_clips, output));
+  }
+  if (input_format_ == "NCTHW") {
+    OUTCOME_TRY(FormatNCTHW(inputs, clip_len, num_clips, output));
+  }
+
+  TensorShape expand_dim = output.shape();
+  expand_dim.insert(expand_dim.begin(), 1);
+  output.Reshape(expand_dim);
+
+  return success();
+}
+
+Result<void> FormatShape::FormatNCHW(Tensor& src, int clip_len, int num_clips, Tensor& dst) {
+  const vector<int> axes = {0, 3, 1, 2};
+  OUTCOME_TRY(permute_.Apply(src, dst, axes));
+  return success();
+}
+
+Result<void> FormatShape::FormatNCTHW(Tensor& src, int clip_len, int num_clips, Tensor& dst) {
+  auto N = src.shape(0);
+  auto H = src.shape(1);
+  auto W = src.shape(2);
+  auto C = src.shape(3);
+  int L = clip_len;
+  if (N % L != 0) {
+    return Status(eInvalidArgument);
+  }
+  int M = N / L;
+  src.Reshape({M, L, H, W, C});
+  const vector<int> axes = {0, 4, 1, 2, 3};
+  OUTCOME_TRY(permute_.Apply(src, dst, axes));
+  return success();
+}
+
+Result<void> FormatShape::Apply(Value& data) {
+  MMDEPLOY_DEBUG("input: {}", data);
+
+  if (!data.is_array()) {
+    MMDEPLOY_ERROR("input of format shape should be array");
+    return Status(eInvalidArgument);
+  }
+  if (!(data[0].contains("imgs") || data[0].contains("img"))) {
+    MMDEPLOY_ERROR("input should contains imgs or img");
+    return Status(eInvalidArgument);
+  }
+
+  int n_image = data.size();
+  int clip_len = data[0]["clip_len"].get<int>();
+  int num_clips = data[0]["num_clips"].get<int>();
+  std::vector<Tensor> images;
+
+  if (data[0].contains("imgs")) {
+    int n_crop = data[0]["imgs"].size();
+    int total = n_image * n_crop;
+    images.reserve(total);
+    for (int i = 0; i < n_crop; i++) {
+      for (int j = 0; j < n_image; j++) {
+        images.push_back(data[j]["imgs"][i].get<Tensor>());
+      }
+    }
+  } else if (data[0].contains("img")) {
+    images.reserve(n_image);
+    for (int i = 0; i < n_image; i++) {
+      images.push_back(data[i]["img"].get<Tensor>());
+    }
+  }
+
+  Tensor dst;
+  data = Value{};
+  OUTCOME_TRY(Format(images, dst, clip_len, num_clips));
+  data["img"] = std::move(dst);
+
+  return success();
+}
+
+MMDEPLOY_REGISTER_TRANSFORM(FormatShape);
+
+}  // namespace mmdeploy::mmaction
--- a/csrc/mmdeploy/codebase/mmaction/format_shape.h
+++ b/csrc/mmdeploy/codebase/mmaction/format_shape.h
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMACTION_FORMAT_SHAPE_H_
+#define MMDEPLOY_CODEBASE_MMACTION_FORMAT_SHAPE_H_
+
+#include <array>
+#include <string>
+#include <vector>
+
+#include "mmdeploy/core/tensor.h"
+#include "mmdeploy/operation/managed.h"
+#include "mmdeploy/operation/vision.h"
+#include "mmdeploy/preprocess/transform/transform.h"
+
+namespace mmdeploy::mmaction {
+
+class FormatShape : public Transform {
+ public:
+  explicit FormatShape(const Value& args);
+
+  Result<void> Apply(Value& data) override;
+
+  Result<void> Format(const std::vector<Tensor>& images, Tensor& output, int clip_len,
+                      int num_clips);
+
+  Result<void> FormatNCHW(Tensor& src, int clip_len, int num_clips, Tensor& dst);
+
+  Result<void> FormatNCTHW(Tensor& src, int clip_len, int num_clips, Tensor& dst);
+
+  Result<void> MergeInputs(const std::vector<Tensor>& images, Tensor& inputs);
+
+ private:
+  std::string input_format_;
+  operation::Managed<operation::Permute> permute_;
+};
+
+}  // namespace mmdeploy::mmaction
+
+#endif
--- a/csrc/mmdeploy/codebase/mmaction/mmaction.cpp
+++ b/csrc/mmdeploy/codebase/mmaction/mmaction.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/codebase/mmaction/mmaction.h"
+
+namespace mmdeploy::mmaction {
+
+MMDEPLOY_REGISTER_CODEBASE(MMAction);
+
+}  // namespace mmdeploy::mmaction
--- a/csrc/mmdeploy/codebase/mmaction/mmaction.h
+++ b/csrc/mmdeploy/codebase/mmaction/mmaction.h
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_CODEBASE_MMACTION_MMACTION_H_
+#define MMDEPLOY_CODEBASE_MMACTION_MMACTION_H_
+
+#include "mmdeploy/codebase/common.h"
+#include "mmdeploy/core/device.h"
+#include "mmdeploy/core/module.h"
+#include "mmdeploy/core/serialization.h"
+
+namespace mmdeploy::mmaction {
+
+struct Label {
+  int label_id;
+  float score;
+  MMDEPLOY_ARCHIVE_MEMBERS(label_id, score);
+};
+
+using Labels = std::vector<Label>;
+
+MMDEPLOY_DECLARE_CODEBASE(MMAction, mmaction);
+
+}  // namespace mmdeploy::mmaction
+
+#endif  // MMDEPLOY_SRC_CODEBASE_MMACTION_MMACTION_H_
--- a/csrc/mmdeploy/codebase/mmcls/CMakeLists.txt
+++ b/csrc/mmdeploy/codebase/mmcls/CMakeLists.txt
+# Copyright (c) OpenMMLab. All rights reserved.
+
+project(mmdeploy_mmcls)
+
+file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+target_link_libraries(${PROJECT_NAME}
+    PRIVATE mmdeploy_opencv_utils)
+add_library(mmdeploy::mmcls ALIAS ${PROJECT_NAME})
+
+set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} classifier CACHE INTERNAL "")
--- a/csrc/mmdeploy/codebase/mmcls/linear_cls.cpp
+++ b/csrc/mmdeploy/codebase/mmcls/linear_cls.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include <algorithm>
+#include <numeric>
+
+#include "mmdeploy/codebase/mmcls/mmcls.h"
+#include "mmdeploy/core/tensor.h"
+#include "mmdeploy/core/utils/device_utils.h"
+#include "mmdeploy/core/utils/formatter.h"
+#include "mmdeploy/experimental/module_adapter.h"
+#include "opencv2/core/core.hpp"
+
+using std::vector;
+
+namespace mmdeploy::mmcls {
+
+class LinearClsHead : public MMClassification {
+ public:
+  explicit LinearClsHead(const Value& cfg) : MMClassification(cfg) {
+    if (cfg.contains("params")) {
+      softmax_ = cfg["params"].value("softmax", false);
+      topk_ = cfg["params"].value("topk", 1);
+      if (topk_ <= 0) {
+        MMDEPLOY_ERROR("'topk' should be greater than 0, but got '{}'", topk_);
+        throw_exception(eInvalidArgument);
+      }
+    }
+  }
+
+  Result<Value> operator()(const Value& infer_res) {
+    MMDEPLOY_DEBUG("infer_res: {}", infer_res);
+    auto output = infer_res["output"].get<Tensor>();
+
+    if (!(output.shape().size() >= 2 && output.data_type() == DataType::kFLOAT)) {
+      MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
+                     (int)output.data_type());
+      return Status(eNotSupported);
+    }
+
+    auto class_num = (int)output.shape(1);
+
+    OUTCOME_TRY(auto _scores, MakeAvailableOnDevice(output, kHost, stream()));
+    OUTCOME_TRY(stream().Wait());
+
+    return GetLabels(_scores, class_num);
+  }
+
+ private:
+  Value GetLabels(const Tensor& scores, int class_num) const {
+    auto scores_data = scores.data<float>();
+    auto topk = std::min(topk_, class_num);
+    Labels output;
+    output.reserve(topk);
+    std::vector<int> idx(class_num);
+    iota(begin(idx), end(idx), 0);
+    partial_sort(begin(idx), begin(idx) + topk, end(idx),
+                 [&](int i, int j) { return scores_data[i] > scores_data[j]; });
+
+    auto sum_exp = 0.f;
+    std::vector<float> exp_scores;
+    if (softmax_) {
+      exp_scores.reserve(class_num);
+      auto max_val = scores_data[idx[0]];
+      for (int i = 0; i < class_num; ++i) {
+        sum_exp += exp_scores.emplace_back(std::exp(scores_data[i] - max_val));
+      }
+    }
+    for (int i = 0; i < topk; ++i) {
+      float score = 0.f;
+      if (softmax_) {
+        score = exp_scores[idx[i]] / sum_exp;
+      } else {
+        score = scores_data[idx[i]];
+      }
+      output.push_back({idx[i], score});
+    }
+    return to_value(std::move(output));
+  }
+
+ private:
+  static constexpr const auto kHost = Device{0};
+
+  bool softmax_{false};
+  int topk_{1};
+};
+
+MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMClassification, LinearClsHead);
+using ConformerHead = LinearClsHead;
+MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMClassification, ConformerHead);
+
+class CropBox {
+ public:
+  Result<Value> operator()(const Value& img, const Value& dets) {
+    auto patch = img["ori_img"].get<Mat>();
+    if (dets.is_object() && dets.contains("bbox")) {
+      auto _box = from_value<std::vector<float>>(dets["bbox"]);
+      cv::Rect rect(cv::Rect_<float>(cv::Point2f(_box[0], _box[1]), cv::Point2f(_box[2], _box[3])));
+      patch = crop(patch, rect);
+    }
+    return Value{{"ori_img", patch}};
+  }
+
+ private:
+  static Mat crop(const Mat& img, cv::Rect rect) {
+    cv::Mat mat(img.height(), img.width(), CV_8UC(img.channel()), img.data<void>());
+    rect &= cv::Rect(cv::Point(0, 0), mat.size());
+    mat = mat(rect).clone();
+    std::shared_ptr<void> data(mat.data, [mat = mat](void*) {});
+    return Mat{mat.rows, mat.cols, img.pixel_format(), img.type(), std::move(data)};
+  }
+};
+
+MMDEPLOY_REGISTER_FACTORY_FUNC(Module, (CropBox, 0),
+                               [](const Value&) { return CreateTask(CropBox{}); });
+
+}  // namespace mmdeploy::mmcls
--- a/csrc/mmdeploy/codebase/mmcls/mmcls.cpp
+++ b/csrc/mmdeploy/codebase/mmcls/mmcls.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include "mmdeploy/codebase/mmcls/mmcls.h"
+
+namespace mmdeploy::mmcls {
+
+MMDEPLOY_REGISTER_CODEBASE(MMClassification);
+
+}  // namespace mmdeploy::mmcls
--- a/csrc/mmdeploy/codebase/mmcls/mmcls.h
+++ b/csrc/mmdeploy/codebase/mmcls/mmcls.h
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#ifndef MMDEPLOY_SRC_CODEBASE_MMCLS_MMCLS_H_
+#define MMDEPLOY_SRC_CODEBASE_MMCLS_MMCLS_H_
+
+#include "mmdeploy/codebase/common.h"
+#include "mmdeploy/core/device.h"
+#include "mmdeploy/core/module.h"
+#include "mmdeploy/core/serialization.h"
+
+namespace mmdeploy::mmcls {
+
+struct Label {
+  int label_id;
+  float score;
+  MMDEPLOY_ARCHIVE_MEMBERS(label_id, score);
+};
+
+using Labels = std::vector<Label>;
+
+MMDEPLOY_DECLARE_CODEBASE(MMClassification, mmcls);
+
+}  // namespace mmdeploy::mmcls
+
+#endif  // MMDEPLOY_SRC_CODEBASE_MMCLS_MMCLS_H_
--- a/csrc/mmdeploy/codebase/mmcls/multi_label_linear_cls.cpp
+++ b/csrc/mmdeploy/codebase/mmcls/multi_label_linear_cls.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+
+#include <algorithm>
+#include <numeric>
+
+#include "mmdeploy/codebase/mmcls/mmcls.h"
+#include "mmdeploy/core/tensor.h"
+#include "mmdeploy/core/utils/device_utils.h"
+#include "mmdeploy/core/utils/formatter.h"
+#include "mmdeploy/experimental/module_adapter.h"
+
+using std::vector;
+
+namespace mmdeploy::mmcls {
+
+class MultiLabelLinearClsHead : public MMClassification {
+ public:
+  explicit MultiLabelLinearClsHead(const Value& cfg) : MMClassification(cfg) {}
+  Result<Value> operator()(const Value& infer_res) {
+    MMDEPLOY_DEBUG("infer_res: {}", infer_res);
+    auto output = infer_res["output"].get<Tensor>();
+
+    if (!(output.shape().size() >= 2 && output.data_type() == DataType::kFLOAT)) {
+      MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
+                     (int)output.data_type());
+      return Status(eNotSupported);
+    }
+
+    auto class_num = (int)output.shape(1);
+
+    OUTCOME_TRY(auto _scores, MakeAvailableOnDevice(output, kHost, stream()));
+    OUTCOME_TRY(stream().Wait());
+
+    return GetLabels(_scores, class_num);
+  }
+
+ private:
+  Value GetLabels(const Tensor& scores, int class_num) const {
+    auto scores_data = scores.data<float>();
+    Labels output;
+    for (int i = 0; i < class_num; ++i) {
+      auto label = Label{i, scores_data[i]};
+      MMDEPLOY_DEBUG("label_id: {}, score: {}", label.label_id, label.score);
+      output.push_back(label);
+    }
+    return to_value(std::move(output));
+  }
+
+ private:
+  static constexpr const auto kHost = Device{0};
+};
+
+MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMClassification, MultiLabelLinearClsHead);
+
+}  // namespace mmdeploy::mmcls
--- a/csrc/mmdeploy/codebase/mmdet/CMakeLists.txt
+++ b/csrc/mmdeploy/codebase/mmdet/CMakeLists.txt
+# Copyright (c) OpenMMLab. All rights reserved.
+
+project(mmdeploy_mmdet)
+
+file(GLOB_RECURSE SRCS ${CMAKE_CURRENT_SOURCE_DIR} "*.cpp")
+mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
+target_link_libraries(${PROJECT_NAME}
+        PRIVATE mmdeploy_opencv_utils mmdeploy_operation)
+
+add_library(mmdeploy::mmdet ALIAS ${PROJECT_NAME})
+
+set(MMDEPLOY_TASKS ${MMDEPLOY_TASKS} detector  CACHE INTERNAL "")
--- a/csrc/mmdeploy/codebase/mmdet/base_dense_head.cpp
+++ b/csrc/mmdeploy/codebase/mmdet/base_dense_head.cpp
+// Copyright (c) OpenMMLab. All rights reserved.
+#include "base_dense_head.h"
+
+#include <numeric>
+
+#include "mmdeploy/core/model.h"
+#include "mmdeploy/core/utils/device_utils.h"
+#include "mmdeploy/core/utils/formatter.h"
+#include "utils.h"
+
+namespace mmdeploy::mmdet {
+
+BaseDenseHead::BaseDenseHead(const Value& cfg) : MMDetection(cfg) {
+  auto init = [&]() -> Result<void> {
+    auto model = cfg["context"]["model"].get<Model>();
+    if (cfg.contains("params")) {
+      nms_pre_ = cfg["params"].value("nms_pre", -1);
+      score_thr_ = cfg["params"].value("score_thr", 0.02f);
+      min_bbox_size_ = cfg["params"].value("min_bbox_size", 0);
+      iou_threshold_ = cfg["params"].contains("nms")
+                           ? cfg["params"]["nms"].value("iou_threshold", 0.45f)
+                           : 0.45f;
+    }
+    return success();
+  };
+  init().value();
+}
+
+Result<Value> BaseDenseHead::operator()(const Value& prep_res, const Value& infer_res) {
+  MMDEPLOY_DEBUG("prep_res: {}\ninfer_res: {}", prep_res, infer_res);
+  try {
+    auto dets = infer_res["dets"].get<Tensor>();
+    auto scores = infer_res["labels"].get<Tensor>();
+    const Device kHost{0, 0};
+    OUTCOME_TRY(auto _dets, MakeAvailableOnDevice(dets, kHost, stream()));
+    OUTCOME_TRY(auto _scores, MakeAvailableOnDevice(scores, kHost, stream()));
+    OUTCOME_TRY(stream().Wait());
+    OUTCOME_TRY(auto result, GetBBoxes(prep_res["img_metas"], _dets, _scores));
+    return to_value(result);
+  } catch (...) {
+    return Status(eFail);
+  }
+}
+
+Result<Detections> BaseDenseHead::GetBBoxes(const Value& prep_res, const Tensor& dets,
+                                            const Tensor& scores) const {
+  MMDEPLOY_DEBUG("dets: {}, {}", dets.shape(), dets.data_type());
+  MMDEPLOY_DEBUG("scores: {}, {}", scores.shape(), scores.data_type());
+
+  std::vector<float> probs;
+  std::vector<int> label_ids;
+  std::vector<int> anchor_idxs;
+
+  FilterScoresAndTopk(scores, score_thr_, nms_pre_, probs, label_ids, anchor_idxs);
+
+  Sort(probs, label_ids, anchor_idxs);
+
+  NMS(dets, iou_threshold_, anchor_idxs);
+
+  Detections objs;
+  std::vector<float> scale_factor;
+  if (prep_res.contains("scale_factor")) {
+    from_value(prep_res["scale_factor"], scale_factor);
+  } else {
+    scale_factor = {1.f, 1.f, 1.f, 1.f};
+  }
+  int ori_width = prep_res["ori_shape"][2].get<int>();
+  int ori_height = prep_res["ori_shape"][1].get<int>();
+  auto det_ptr = dets.data<float>();
+  for (int i = 0; i < anchor_idxs.size(); ++i) {
+    if (anchor_idxs[i] == -1) {
+      continue;
+    }
+    int j = anchor_idxs[i];
+    auto x1 = det_ptr[j * 4 + 0];
+    auto y1 = det_ptr[j * 4 + 1];
+    auto x2 = det_ptr[j * 4 + 2];
+    auto y2 = det_ptr[j * 4 + 3];
+    int label_id = label_ids[i];
+    float score = probs[i];
+
+    MMDEPLOY_DEBUG("{}-th box: ({}, {}, {}, {}), {}, {}", i, x1, y1, x2, y2, label_id, score);
+
+    auto rect =
+        MapToOriginImage(x1, y1, x2, y2, scale_factor.data(), 0, 0, ori_width, ori_height, 0, 0);
+    if (rect[2] - rect[0] < min_bbox_size_ || rect[3] - rect[1] < min_bbox_size_) {
+      MMDEPLOY_DEBUG("ignore small bbox with width '{}' and height '{}", rect[2] - rect[0],
+                     rect[3] - rect[1]);
+      continue;
+    }
+    Detection det{};
+    det.index = i;
+    det.label_id = label_id;
+    det.score = score;
+    det.bbox = rect;
+    objs.push_back(std::move(det));
+  }
+
+  return objs;
+}
+
+MMDEPLOY_REGISTER_CODEBASE_COMPONENT(MMDetection, BaseDenseHead);
+
+}  // namespace mmdeploy::mmdet