nms.h 903 Bytes
Newer Older
1
#pragma once
2
#include "cpu/vision_cpu.h"
3
4

#ifdef WITH_CUDA
5
#include "autocast.h"
6
#include "cuda/vision_cuda.h"
7
#endif
8
#ifdef WITH_HIP
9
#include "autocast.h"
10
11
#include "hip/vision_cuda.h"
#endif
12

13
// nms dispatch nexus
14
15
16
at::Tensor nms(
    const at::Tensor& dets,
    const at::Tensor& scores,
17
    const double iou_threshold) {
18
19
20
21
22
  static auto op = c10::Dispatcher::singleton()
                       .findSchemaOrThrow("torchvision::nms", "")
                       .typed<decltype(nms)>();
  return op.call(dets, scores, iou_threshold);
}
23

24
#if defined(WITH_CUDA) || defined(WITH_HIP)
25
26
27
28
29
30
31
32
33
at::Tensor nms_autocast(
    const at::Tensor& dets,
    const at::Tensor& scores,
    const double iou_threshold) {
  c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
  return nms(
      autocast::_cast(at::kFloat, dets),
      autocast::_cast(at::kFloat, scores),
      iou_threshold);
34
}
35
#endif