Commit 804235b0 authored by Dave Schnizlein's avatar Dave Schnizlein Committed by Facebook GitHub Bot
Browse files

Remove point mesh edge kernels

Summary:
Removes the now-unnecessary kernels from point mesh edge file

Migrates all point mesh functionality into one file.

Reviewed By: gkioxari

Differential Revision: D24550086

fbshipit-source-id: f924996cd38a7c2c1cf189d8a01611de4506cfa3
parent 8dcfe30f
......@@ -15,8 +15,7 @@
#include "interp_face_attrs/interp_face_attrs.h"
#include "knn/knn.h"
#include "packed_to_padded_tensor/packed_to_padded_tensor.h"
#include "point_mesh/point_mesh_edge.h"
#include "point_mesh/point_mesh_face.h"
#include "point_mesh/point_mesh_cuda.h"
#include "rasterize_meshes/rasterize_meshes.h"
#include "rasterize_points/rasterize_points.h"
......
......@@ -12,7 +12,7 @@
#include "utils/warp_reduce.cuh"
// ****************************************************************************
// * PointFaceDistance *
// * Generic Forward/Backward Kernels *
// ****************************************************************************
__global__ void DistanceForwardKernel(
......@@ -202,16 +202,6 @@ std::tuple<at::Tensor, at::Tensor> DistanceForwardCuda(
return std::make_tuple(dists, idxs);
}
std::tuple<at::Tensor, at::Tensor> PointFaceDistanceForwardCuda(
const at::Tensor& points,
const at::Tensor& points_first_idx,
const at::Tensor& tris,
const at::Tensor& tris_first_idx,
const int64_t max_points) {
return DistanceForwardCuda(
points, 1, points_first_idx, tris, 3, tris_first_idx, max_points);
}
__global__ void DistanceBackwardKernel(
const float* __restrict__ objects, // (O * oD * 3)
const size_t objects_size, // O
......@@ -365,6 +355,20 @@ std::tuple<at::Tensor, at::Tensor> DistanceBackwardCuda(
return std::make_tuple(grad_points, grad_tris);
}
// ****************************************************************************
// * PointFaceDistance *
// ****************************************************************************
std::tuple<at::Tensor, at::Tensor> PointFaceDistanceForwardCuda(
const at::Tensor& points,
const at::Tensor& points_first_idx,
const at::Tensor& tris,
const at::Tensor& tris_first_idx,
const int64_t max_points) {
return DistanceForwardCuda(
points, 1, points_first_idx, tris, 3, tris_first_idx, max_points);
}
std::tuple<at::Tensor, at::Tensor> PointFaceDistanceBackwardCuda(
const at::Tensor& points,
const at::Tensor& tris,
......@@ -395,9 +399,54 @@ std::tuple<at::Tensor, at::Tensor> FacePointDistanceBackwardCuda(
return DistanceBackwardCuda(tris, 3, points, 1, idx_tris, grad_dists);
}
// ****************************************************************************
// * PointEdgeDistance *
// ****************************************************************************
std::tuple<at::Tensor, at::Tensor> PointEdgeDistanceForwardCuda(
const at::Tensor& points,
const at::Tensor& points_first_idx,
const at::Tensor& segms,
const at::Tensor& segms_first_idx,
const int64_t max_points) {
return DistanceForwardCuda(
points, 1, points_first_idx, segms, 2, segms_first_idx, max_points);
}
std::tuple<at::Tensor, at::Tensor> PointEdgeDistanceBackwardCuda(
const at::Tensor& points,
const at::Tensor& segms,
const at::Tensor& idx_points,
const at::Tensor& grad_dists) {
return DistanceBackwardCuda(points, 1, segms, 2, idx_points, grad_dists);
}
// ****************************************************************************
// * EdgePointDistance *
// ****************************************************************************
std::tuple<at::Tensor, at::Tensor> EdgePointDistanceForwardCuda(
const at::Tensor& points,
const at::Tensor& points_first_idx,
const at::Tensor& segms,
const at::Tensor& segms_first_idx,
const int64_t max_segms) {
return DistanceForwardCuda(
segms, 2, segms_first_idx, points, 1, points_first_idx, max_segms);
}
std::tuple<at::Tensor, at::Tensor> EdgePointDistanceBackwardCuda(
const at::Tensor& points,
const at::Tensor& segms,
const at::Tensor& idx_segms,
const at::Tensor& grad_dists) {
return DistanceBackwardCuda(segms, 2, points, 1, idx_segms, grad_dists);
}
// ****************************************************************************
// * PointFaceArrayDistance *
// ****************************************************************************
// TODO: Create wrapper function and merge kernel with other array kernel
__global__ void PointFaceArrayForwardKernel(
const float* __restrict__ points, // (P, 3)
......@@ -565,3 +614,164 @@ std::tuple<at::Tensor, at::Tensor> PointFaceArrayDistanceBackwardCuda(
AT_CUDA_CHECK(cudaGetLastError());
return std::make_tuple(grad_points, grad_tris);
}
// ****************************************************************************
// * PointEdgeArrayDistance *
// ****************************************************************************
// TODO: Create wrapper function and merge kernel with other array kernel
__global__ void PointEdgeArrayForwardKernel(
const float* __restrict__ points, // (P, 3)
const float* __restrict__ segms, // (S, 2, 3)
float* __restrict__ dists, // (P, S)
const size_t P,
const size_t S) {
float3* points_f3 = (float3*)points;
float3* segms_f3 = (float3*)segms;
// Parallelize over P * S computations
const int num_threads = gridDim.x * blockDim.x;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
for (int t_i = tid; t_i < P * S; t_i += num_threads) {
const int s = t_i / P; // segment index.
const int p = t_i % P; // point index
float3 a = segms_f3[s * 2 + 0];
float3 b = segms_f3[s * 2 + 1];
float3 point = points_f3[p];
float dist = PointLine3DistanceForward(point, a, b);
dists[p * S + s] = dist;
}
}
at::Tensor PointEdgeArrayDistanceForwardCuda(
const at::Tensor& points,
const at::Tensor& segms) {
// Check inputs are on the same device
at::TensorArg points_t{points, "points", 1}, segms_t{segms, "segms", 2};
at::CheckedFrom c = "PointEdgeArrayDistanceForwardCuda";
at::checkAllSameGPU(c, {points_t, segms_t});
at::checkAllSameType(c, {points_t, segms_t});
// Set the device for the kernel launch based on the device of the input
at::cuda::CUDAGuard device_guard(points.device());
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
const int64_t P = points.size(0);
const int64_t S = segms.size(0);
TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3");
TORCH_CHECK(
(segms.size(1) == 2) && (segms.size(2) == 3),
"segms must be of shape Sx2x3");
at::Tensor dists = at::zeros({P, S}, points.options());
if (dists.numel() == 0) {
AT_CUDA_CHECK(cudaGetLastError());
return dists;
}
const size_t blocks = 1024;
const size_t threads = 64;
PointEdgeArrayForwardKernel<<<blocks, threads, 0, stream>>>(
points.contiguous().data_ptr<float>(),
segms.contiguous().data_ptr<float>(),
dists.data_ptr<float>(),
P,
S);
AT_CUDA_CHECK(cudaGetLastError());
return dists;
}
__global__ void PointEdgeArrayBackwardKernel(
const float* __restrict__ points, // (P, 3)
const float* __restrict__ segms, // (S, 2, 3)
const float* __restrict__ grad_dists, // (P, S)
float* __restrict__ grad_points, // (P, 3)
float* __restrict__ grad_segms, // (S, 2, 3)
const size_t P,
const size_t S) {
float3* points_f3 = (float3*)points;
float3* segms_f3 = (float3*)segms;
// Parallelize over P * S computations
const int num_threads = gridDim.x * blockDim.x;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
for (int t_i = tid; t_i < P * S; t_i += num_threads) {
const int s = t_i / P; // segment index.
const int p = t_i % P; // point index
const float3 a = segms_f3[s * 2 + 0];
const float3 b = segms_f3[s * 2 + 1];
const float3 point = points_f3[p];
const float grad_dist = grad_dists[p * S + s];
const auto grads = PointLine3DistanceBackward(point, a, b, grad_dist);
const float3 grad_point = thrust::get<0>(grads);
const float3 grad_a = thrust::get<1>(grads);
const float3 grad_b = thrust::get<2>(grads);
atomicAdd(grad_points + p * 3 + 0, grad_point.x);
atomicAdd(grad_points + p * 3 + 1, grad_point.y);
atomicAdd(grad_points + p * 3 + 2, grad_point.z);
atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 0, grad_a.x);
atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 1, grad_a.y);
atomicAdd(grad_segms + s * 2 * 3 + 0 * 3 + 2, grad_a.z);
atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 0, grad_b.x);
atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 1, grad_b.y);
atomicAdd(grad_segms + s * 2 * 3 + 1 * 3 + 2, grad_b.z);
}
}
std::tuple<at::Tensor, at::Tensor> PointEdgeArrayDistanceBackwardCuda(
const at::Tensor& points,
const at::Tensor& segms,
const at::Tensor& grad_dists) {
// Check inputs are on the same device
at::TensorArg points_t{points, "points", 1}, segms_t{segms, "segms", 2},
grad_dists_t{grad_dists, "grad_dists", 3};
at::CheckedFrom c = "PointEdgeArrayDistanceBackwardCuda";
at::checkAllSameGPU(c, {points_t, segms_t, grad_dists_t});
at::checkAllSameType(c, {points_t, segms_t, grad_dists_t});
// Set the device for the kernel launch based on the device of the input
at::cuda::CUDAGuard device_guard(points.device());
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
const int64_t P = points.size(0);
const int64_t S = segms.size(0);
TORCH_CHECK(points.size(1) == 3, "points must be of shape Px3");
TORCH_CHECK(
(segms.size(1) == 2) && (segms.size(2) == 3),
"segms must be of shape Sx2x3");
TORCH_CHECK((grad_dists.size(0) == P) && (grad_dists.size(1) == S));
at::Tensor grad_points = at::zeros({P, 3}, points.options());
at::Tensor grad_segms = at::zeros({S, 2, 3}, segms.options());
if (grad_points.numel() == 0 || grad_segms.numel() == 0) {
AT_CUDA_CHECK(cudaGetLastError());
return std::make_tuple(grad_points, grad_segms);
}
const size_t blocks = 1024;
const size_t threads = 64;
PointEdgeArrayBackwardKernel<<<blocks, threads, 0, stream>>>(
points.contiguous().data_ptr<float>(),
segms.contiguous().data_ptr<float>(),
grad_dists.contiguous().data_ptr<float>(),
grad_points.data_ptr<float>(),
grad_segms.data_ptr<float>(),
P,
S);
AT_CUDA_CHECK(cudaGetLastError());
return std::make_tuple(grad_points, grad_segms);
}
......@@ -241,6 +241,242 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
return FacePointDistanceBackwardCpu(points, tris, idx_tris, grad_dists);
}
// ****************************************************************************
// * PointEdgeDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each p in points to the closest
// mesh edge belonging to the corresponding example in the batch of size N.
//
// Args:
// points: FloatTensor of shape (P, 3)
// points_first_idx: LongTensor of shape (N,) indicating the first point
// index for each example in the batch
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge
// segment is spanned by (segms[s, 0], segms[s, 1])
// segms_first_idx: LongTensor of shape (N,) indicating the first edge
// index for each example in the batch
// max_points: Scalar equal to max(P_i) for i in [0, N - 1] containing
// the maximum number of points in the batch and is used to set
// the grid dimensions in the CUDA implementation.
//
// Returns:
// dists: FloatTensor of shape (P,), where dists[p] is the squared euclidean
// distance of points[p] to the closest edge in the same example in the
// batch.
// idxs: LongTensor of shape (P,), where idxs[p] is the index of the closest
// edge in the batch.
// So, dists[p] = d(points[p], segms[idxs[p], 0], segms[idxs[p], 1]),
// where d(u, v0, v1) is the distance of u from the segment spanned by
// (v0, v1).
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(points_first_idx);
CHECK_CUDA(segms);
CHECK_CUDA(segms_first_idx);
return PointEdgeDistanceForwardCuda(
points, points_first_idx, segms, segms_first_idx, max_points);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return PointEdgeDistanceForwardCpu(
points, points_first_idx, segms, segms_first_idx, max_points);
}
// Backward pass for PointEdgeDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// idx_points: LongTensor of shape (P,) containing the indices
// of the closest edge in the example in the batch.
// This is computed by the forward pass.
// grad_dists: FloatTensor of shape (P,)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(idx_points);
CHECK_CUDA(grad_dists);
return PointEdgeDistanceBackwardCuda(points, segms, idx_points, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists);
}
// ****************************************************************************
// * EdgePointDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each edge segment to the closest
// point belonging to the corresponding example in the batch of size N.
//
// Args:
// points: FloatTensor of shape (P, 3)
// points_first_idx: LongTensor of shape (N,) indicating the first point
// index for each example in the batch
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge
// segment is spanned by (segms[s, 0], segms[s, 1])
// segms_first_idx: LongTensor of shape (N,) indicating the first edge
// index for each example in the batch
// max_segms: Scalar equal to max(S_i) for i in [0, N - 1] containing
// the maximum number of edges in the batch and is used to set
// the block dimensions in the CUDA implementation.
//
// Returns:
// dists: FloatTensor of shape (S,), where dists[s] is the squared
// euclidean distance of s-th edge to the closest point in the
// corresponding example in the batch.
// idxs: LongTensor of shape (S,), where idxs[s] is the index of the closest
// point in the example in the batch.
// So, dists[s] = d(points[idxs[s]], segms[s, 0], segms[s, 1]), where
// d(u, v0, v1) is the distance of u from the segment spanned by (v0, v1)
//
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms);
#endif
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms);
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(points_first_idx);
CHECK_CUDA(segms);
CHECK_CUDA(segms_first_idx);
return EdgePointDistanceForwardCuda(
points, points_first_idx, segms, segms_first_idx, max_segms);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return EdgePointDistanceForwardCpu(
points, points_first_idx, segms, segms_first_idx, max_segms);
}
// Backward pass for EdgePointDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// idx_segms: LongTensor of shape (S,) containing the indices
// of the closest point in the example in the batch.
// This is computed by the forward pass
// grad_dists: FloatTensor of shape (S,)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(idx_segms);
CHECK_CUDA(grad_dists);
return EdgePointDistanceBackwardCuda(points, segms, idx_segms, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists);
}
// ****************************************************************************
// * PointFaceArrayDistance *
// ****************************************************************************
......@@ -328,3 +564,92 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
}
return PointFaceArrayDistanceBackwardCpu(points, tris, grad_dists);
}
// ****************************************************************************
// * PointEdgeArrayDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each p in points to each edge
// segment in segms.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th
// edge segment is spanned by (segms[s, 0], segms[s, 1])
//
// Returns:
// dists: FloatTensor of shape (P, S), where dists[p, s] is the squared
// euclidean distance of points[p] to the segment spanned by
// (segms[s, 0], segms[s, 1])
//
// For pointcloud and meshes of batch size N, this function requires N
// computations. The memory occupied is O(NPS) which can become quite large.
// For example, a medium sized batch with N = 32 with P = 10000 and S = 5000
// will require for the forward pass 5.8G of memory to store dists.
#ifdef WITH_CUDA
torch::Tensor PointEdgeArrayDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms);
#endif
torch::Tensor PointEdgeArrayDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms);
torch::Tensor PointEdgeArrayDistanceForward(
const torch::Tensor& points,
const torch::Tensor& segms) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
return PointEdgeArrayDistanceForwardCuda(points, segms);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return PointEdgeArrayDistanceForwardCpu(points, segms);
}
// Backward pass for PointEdgeArrayDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// grad_dists: FloatTensor of shape (P, S)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(grad_dists);
return PointEdgeArrayDistanceBackwardCuda(points, segms, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists);
}
This diff is collapsed.
// Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#pragma once
#include <torch/extension.h>
#include <cstdio>
#include <tuple>
#include "utils/pytorch3d_cutils.h"
// ****************************************************************************
// * PointEdgeDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each p in points to the closest
// mesh edge belonging to the corresponding example in the batch of size N.
//
// Args:
// points: FloatTensor of shape (P, 3)
// points_first_idx: LongTensor of shape (N,) indicating the first point
// index for each example in the batch
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge
// segment is spanned by (segms[s, 0], segms[s, 1])
// segms_first_idx: LongTensor of shape (N,) indicating the first edge
// index for each example in the batch
// max_points: Scalar equal to max(P_i) for i in [0, N - 1] containing
// the maximum number of points in the batch and is used to set
// the grid dimensions in the CUDA implementation.
//
// Returns:
// dists: FloatTensor of shape (P,), where dists[p] is the squared euclidean
// distance of points[p] to the closest edge in the same example in the
// batch.
// idxs: LongTensor of shape (P,), where idxs[p] is the index of the closest
// edge in the batch.
// So, dists[p] = d(points[p], segms[idxs[p], 0], segms[idxs[p], 1]),
// where d(u, v0, v1) is the distance of u from the segment spanned by
// (v0, v1).
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(points_first_idx);
CHECK_CUDA(segms);
CHECK_CUDA(segms_first_idx);
return PointEdgeDistanceForwardCuda(
points, points_first_idx, segms, segms_first_idx, max_points);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return PointEdgeDistanceForwardCpu(
points, points_first_idx, segms, segms_first_idx, max_points);
}
// Backward pass for PointEdgeDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// idx_points: LongTensor of shape (P,) containing the indices
// of the closest edge in the example in the batch.
// This is computed by the forward pass.
// grad_dists: FloatTensor of shape (P,)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(idx_points);
CHECK_CUDA(grad_dists);
return PointEdgeDistanceBackwardCuda(points, segms, idx_points, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists);
}
// ****************************************************************************
// * EdgePointDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each edge segment to the closest
// point belonging to the corresponding example in the batch of size N.
//
// Args:
// points: FloatTensor of shape (P, 3)
// points_first_idx: LongTensor of shape (N,) indicating the first point
// index for each example in the batch
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge
// segment is spanned by (segms[s, 0], segms[s, 1])
// segms_first_idx: LongTensor of shape (N,) indicating the first edge
// index for each example in the batch
// max_segms: Scalar equal to max(S_i) for i in [0, N - 1] containing
// the maximum number of edges in the batch and is used to set
// the block dimensions in the CUDA implementation.
//
// Returns:
// dists: FloatTensor of shape (S,), where dists[s] is the squared
// euclidean distance of s-th edge to the closest point in the
// corresponding example in the batch.
// idxs: LongTensor of shape (S,), where idxs[s] is the index of the closest
// point in the example in the batch.
// So, dists[s] = d(points[idxs[s]], segms[s, 0], segms[s, 1]), where
// d(u, v0, v1) is the distance of u from the segment spanned by (v0, v1)
//
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms);
#endif
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms);
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(points_first_idx);
CHECK_CUDA(segms);
CHECK_CUDA(segms_first_idx);
return EdgePointDistanceForwardCuda(
points, points_first_idx, segms, segms_first_idx, max_segms);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return EdgePointDistanceForwardCpu(
points, points_first_idx, segms, segms_first_idx, max_segms);
}
// Backward pass for EdgePointDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// idx_segms: LongTensor of shape (S,) containing the indices
// of the closest point in the example in the batch.
// This is computed by the forward pass
// grad_dists: FloatTensor of shape (S,)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(idx_segms);
CHECK_CUDA(grad_dists);
return EdgePointDistanceBackwardCuda(points, segms, idx_segms, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists);
}
// ****************************************************************************
// * PointEdgeArrayDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each p in points to each edge
// segment in segms.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th
// edge segment is spanned by (segms[s, 0], segms[s, 1])
//
// Returns:
// dists: FloatTensor of shape (P, S), where dists[p, s] is the squared
// euclidean distance of points[p] to the segment spanned by
// (segms[s, 0], segms[s, 1])
//
// For pointcloud and meshes of batch size N, this function requires N
// computations. The memory occupied is O(NPS) which can become quite large.
// For example, a medium sized batch with N = 32 with P = 10000 and S = 5000
// will require for the forward pass 5.8G of memory to store dists.
#ifdef WITH_CUDA
torch::Tensor PointEdgeArrayDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms);
#endif
torch::Tensor PointEdgeArrayDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms);
torch::Tensor PointEdgeArrayDistanceForward(
const torch::Tensor& points,
const torch::Tensor& segms) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
return PointEdgeArrayDistanceForwardCuda(points, segms);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return PointEdgeArrayDistanceForwardCpu(points, segms);
}
// Backward pass for PointEdgeArrayDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// grad_dists: FloatTensor of shape (P, S)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(grad_dists);
return PointEdgeArrayDistanceBackwardCuda(points, segms, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment