nms_kernel.cpp 1.01 KB
Newer Older
1
2
3
#include "../nms.h"

#include <ATen/autocast_mode.h>
4
#include <torch/library.h>
5
6
7
8
9
10
11
#include <torch/types.h>

namespace vision {
namespace ops {

namespace {

12
template <c10::DispatchKey autocast_key, c10::DeviceType device_type>
13
14
15
16
at::Tensor nms_autocast(
    const at::Tensor& dets,
    const at::Tensor& scores,
    double iou_threshold) {
17
18
  c10::impl::ExcludeDispatchKeyGuard no_autocast(autocast_key);

19
  return nms(
20
21
      at::autocast::cached_cast(at::kFloat, dets, device_type),
      at::autocast::cached_cast(at::kFloat, scores, device_type),
22
23
24
25
26
27
      iou_threshold);
}

} // namespace

TORCH_LIBRARY_IMPL(torchvision, Autocast, m) {
28
29
30
31
32
33
34
35
36
37
38
  m.impl(
      TORCH_SELECTIVE_NAME("torchvision::nms"),
      TORCH_FN(
          (nms_autocast<c10::DispatchKey::Autocast, c10::DeviceType::CUDA>)));
}

TORCH_LIBRARY_IMPL(torchvision, AutocastCPU, m) {
  m.impl(
      TORCH_SELECTIVE_NAME("torchvision::nms"),
      TORCH_FN(
          (nms_autocast<c10::DispatchKey::AutocastCPU, c10::DeviceType::CPU>)));
39
40
41
42
}

} // namespace ops
} // namespace vision