Commit c5f26a44 authored by ashawkey's avatar ashawkey
Browse files

support signed_distance

parent e1eb521b
__pycache__/
build/
*.egg-info/
*.so
.vs/
*.ply
eigen*/
\ No newline at end of file
...@@ -5,6 +5,11 @@ import torch ...@@ -5,6 +5,11 @@ import torch
# CUDA extension # CUDA extension
import _cubvh as _backend import _cubvh as _backend
_sdf_mode_to_id = {
'watertight': 0,
'raystab': 1,
}
class cuBVH(): class cuBVH():
def __init__(self, vertices, triangles): def __init__(self, vertices, triangles):
# vertices: np.ndarray, [N, 3] # vertices: np.ndarray, [N, 3]
...@@ -78,4 +83,33 @@ class cuBVH(): ...@@ -78,4 +83,33 @@ class cuBVH():
return distances, face_id, uvw return distances, face_id, uvw
# def signed_distance(): def signed_distance(self, positions, return_uvw=False, mode='watertight'):
# positions: torch.Tensor, float, [N, 3]
positions = positions.float().contiguous()
if not positions.is_cuda: positions = positions.cuda()
prefix = positions.shape[:-1]
positions = positions.view(-1, 3)
N = positions.shape[0]
# init output buffers
distances = torch.empty(N, dtype=torch.float32, device=positions.device)
face_id = torch.empty(N, dtype=torch.int64, device=positions.device)
if return_uvw:
uvw = torch.empty(N, 3, dtype=torch.float32, device=positions.device)
else:
uvw = None
self.impl.signed_distance(positions, distances, face_id, uvw, _sdf_mode_to_id[mode]) # [N, 3]
distances = distances.view(*prefix)
face_id = face_id.view(*prefix)
if uvw is not None:
uvw = uvw.view(*prefix, 3)
return distances, face_id, uvw
...@@ -20,6 +20,7 @@ public: ...@@ -20,6 +20,7 @@ public:
virtual void ray_trace(at::Tensor rays_o, at::Tensor rays_d, at::Tensor positions, at::Tensor face_id, at::Tensor depth) = 0; virtual void ray_trace(at::Tensor rays_o, at::Tensor rays_d, at::Tensor positions, at::Tensor face_id, at::Tensor depth) = 0;
virtual void unsigned_distance(at::Tensor positions, at::Tensor distances, at::Tensor face_id, at::optional<at::Tensor> uvw) = 0; virtual void unsigned_distance(at::Tensor positions, at::Tensor distances, at::Tensor face_id, at::optional<at::Tensor> uvw) = 0;
virtual void signed_distance(at::Tensor positions, at::Tensor distances, at::Tensor face_id, at::optional<at::Tensor> uvw, uint32_t mode) = 0;
}; };
// function to create an implementation of cuBVH // function to create an implementation of cuBVH
......
...@@ -52,11 +52,11 @@ protected: ...@@ -52,11 +52,11 @@ protected:
public: public:
virtual void build(std::vector<Triangle>& triangles, uint32_t n_primitives_per_leaf) = 0; virtual void build(std::vector<Triangle>& triangles, uint32_t n_primitives_per_leaf) = 0;
virtual void signed_distance_gpu(uint32_t n_elements, uint32_t mode, const float* positions, float* distances, int64_t* face_id, float* uvw, const Triangle* gpu_triangles, cudaStream_t stream) = 0;
virtual void unsigned_distance_gpu(uint32_t n_elements, const float* positions, float* distances, int64_t* face_id, float* uvw, const Triangle* gpu_triangles, cudaStream_t stream) = 0; virtual void unsigned_distance_gpu(uint32_t n_elements, const float* positions, float* distances, int64_t* face_id, float* uvw, const Triangle* gpu_triangles, cudaStream_t stream) = 0;
virtual void ray_trace_gpu(uint32_t n_elements, const float* rays_o, const float* rays_d, float* positions, int64_t* face_id, float* depth, const Triangle* gpu_triangles, cudaStream_t stream) = 0; virtual void ray_trace_gpu(uint32_t n_elements, const float* rays_o, const float* rays_d, float* positions, int64_t* face_id, float* depth, const Triangle* gpu_triangles, cudaStream_t stream) = 0;
// KIUI: not supported now. // KIUI: not supported now.
// virtual void signed_distance_gpu(uint32_t n_elements, EMeshSdfMode mode, const Eigen::Vector3f* gpu_positions, float* gpu_distances, const Triangle* gpu_triangles, bool use_existing_distances_as_upper_bounds, cudaStream_t stream) = 0;
// virtual bool touches_triangle(const BoundingBox& bb, const Triangle* __restrict__ triangles) const = 0; // virtual bool touches_triangle(const BoundingBox& bb, const Triangle* __restrict__ triangles) const = 0;
// virtual void build_optix(const GPUMemory<Triangle>& triangles, cudaStream_t stream) = 0; // virtual void build_optix(const GPUMemory<Triangle>& triangles, cudaStream_t stream) = 0;
......
...@@ -60,4 +60,39 @@ __host__ __device__ void host_device_swap(T& a, T& b) { ...@@ -60,4 +60,39 @@ __host__ __device__ void host_device_swap(T& a, T& b) {
T c(a); a=b; b=c; T c(a); a=b; b=c;
} }
inline __host__ __device__ Eigen::Vector3f cylindrical_to_dir(const Eigen::Vector2f& p) {
const float cos_theta = -2.0f * p.x() + 1.0f;
const float phi = 2.0f * PI * (p.y() - 0.5f);
const float sin_theta = sqrtf(fmaxf(1.0f - cos_theta * cos_theta, 0.0f));
float sin_phi, cos_phi;
sincosf(phi, &sin_phi, &cos_phi);
return {sin_theta * cos_phi, sin_theta * sin_phi, cos_theta};
}
inline __host__ __device__ float fractf(float x) {
return x - floorf(x);
}
template <uint32_t N_DIRS>
__device__ __host__ Eigen::Vector3f fibonacci_dir(uint32_t i, const Eigen::Vector2f& offset) {
// Fibonacci lattice with offset
float epsilon;
if (N_DIRS >= 11000) {
epsilon = 27;
} else if (N_DIRS >= 890) {
epsilon = 10;
} else if (N_DIRS >= 177) {
epsilon = 3.33;
} else if (N_DIRS >= 24) {
epsilon = 1.33;
} else {
epsilon = 0.33;
}
static constexpr float GOLDEN_RATIO = 1.6180339887498948482045868343656f;
return cylindrical_to_dir(Eigen::Vector2f{fractf((i+epsilon) / (N_DIRS-1+2*epsilon) + offset.x()), fractf(i / GOLDEN_RATIO + offset.y())});
}
} }
\ No newline at end of file
/*
* Tiny self-contained version of the PCG Random Number Generation for C++
* put together from pieces of the much larger C/C++ codebase.
* Wenzel Jakob, February 2015
*
* The PCG random number generator was developed by Melissa O'Neill
* <oneill@pcg-random.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For additional information about the PCG random number generation scheme,
* including its license and other licensing options, visit
*
* http://www.pcg-random.org
*
* Note: This code was modified to work with CUDA by the tiny-cuda-nn authors.
*/
#pragma once
// #include <cubvh/common.h>
#define PCG32_DEFAULT_STATE 0x853c49e6748fea9bULL
#define PCG32_DEFAULT_STREAM 0xda3e39cb94b95bdbULL
#define PCG32_MULT 0x5851f42d4c957f2dULL
#include <cmath>
#include <cassert>
namespace cubvh {
/// PCG32 Pseudorandom number generator
struct pcg32 {
/// Initialize the pseudorandom number generator with default seed
__host__ __device__ pcg32() : state(PCG32_DEFAULT_STATE), inc(PCG32_DEFAULT_STREAM) {}
/// Initialize the pseudorandom number generator with the \ref seed() function
__host__ __device__ pcg32(uint64_t initstate, uint64_t initseq = 1u) { seed(initstate, initseq); }
/**
* \brief Seed the pseudorandom number generator
*
* Specified in two parts: a state initializer and a sequence selection
* constant (a.k.a. stream id)
*/
__host__ __device__ void seed(uint64_t initstate, uint64_t initseq = 1) {
state = 0U;
inc = (initseq << 1u) | 1u;
next_uint();
state += initstate;
next_uint();
}
/// Generate a uniformly distributed unsigned 32-bit random number
__host__ __device__ uint32_t next_uint() {
uint64_t oldstate = state;
state = oldstate * PCG32_MULT + inc;
uint32_t xorshifted = (uint32_t) (((oldstate >> 18u) ^ oldstate) >> 27u);
uint32_t rot = (uint32_t) (oldstate >> 59u);
return (xorshifted >> rot) | (xorshifted << ((~rot + 1u) & 31));
}
/// Generate a uniformly distributed number, r, where 0 <= r < bound
__host__ __device__ uint32_t next_uint(uint32_t bound) {
// To avoid bias, we need to make the range of the RNG a multiple of
// bound, which we do by dropping output less than a threshold.
// A naive scheme to calculate the threshold would be to do
//
// uint32_t threshold = 0x100000000ull % bound;
//
// but 64-bit div/mod is slower than 32-bit div/mod (especially on
// 32-bit platforms). In essence, we do
//
// uint32_t threshold = (0x100000000ull-bound) % bound;
//
// because this version will calculate the same modulus, but the LHS
// value is less than 2^32.
uint32_t threshold = (~bound+1u) % bound;
// Uniformity guarantees that this loop will terminate. In practice, it
// should usually terminate quickly; on average (assuming all bounds are
// equally likely), 82.25% of the time, we can expect it to require just
// one iteration. In the worst case, someone passes a bound of 2^31 + 1
// (i.e., 2147483649), which invalidates almost 50% of the range. In
// practice, bounds are typically small and only a tiny amount of the range
// is eliminated.
for (;;) {
uint32_t r = next_uint();
if (r >= threshold)
return r % bound;
}
}
/// Generate a single precision floating point value on the interval [0, 1)
__host__ __device__ float next_float() {
/* Trick from MTGP: generate an uniformly distributed
single precision number in [1,2) and subtract 1. */
union {
uint32_t u;
float f;
} x;
x.u = (next_uint() >> 9) | 0x3f800000u;
return x.f - 1.0f;
}
/**
* \brief Generate a double precision floating point value on the interval [0, 1)
*
* \remark Since the underlying random number generator produces 32 bit output,
* only the first 32 mantissa bits will be filled (however, the resolution is still
* finer than in \ref next_float(), which only uses 23 mantissa bits)
*/
__host__ __device__ double next_double() {
/* Trick from MTGP: generate an uniformly distributed
double precision number in [1,2) and subtract 1. */
union {
uint64_t u;
double d;
} x;
x.u = ((uint64_t) next_uint() << 20) | 0x3ff0000000000000ULL;
return x.d - 1.0;
}
/**
* \brief Multi-step advance function (jump-ahead, jump-back)
*
* The method used here is based on Brown, "Random Number Generation
* with Arbitrary Stride", Transactions of the American Nuclear
* Society (Nov. 1994). The algorithm is very similar to fast
* exponentiation.
*
* The default value of 2^32 ensures that the PRNG is advanced
* sufficiently far that there is (likely) no overlap with
* previously drawn random numbers, even if small advancements.
* are made inbetween.
*/
__host__ __device__ void advance(int64_t delta_ = (1ll<<32)) {
uint64_t
cur_mult = PCG32_MULT,
cur_plus = inc,
acc_mult = 1u,
acc_plus = 0u;
/* Even though delta is an unsigned integer, we can pass a signed
integer to go backwards, it just goes "the long way round". */
uint64_t delta = (uint64_t) delta_;
while (delta > 0) {
if (delta & 1) {
acc_mult *= cur_mult;
acc_plus = acc_plus * cur_mult + cur_plus;
}
cur_plus = (cur_mult + 1) * cur_plus;
cur_mult *= cur_mult;
delta /= 2;
}
state = acc_mult * state + acc_plus;
}
/// Compute the distance between two PCG32 pseudorandom number generators
__host__ __device__ int64_t operator-(const pcg32 &other) const {
assert(inc == other.inc);
uint64_t
cur_mult = PCG32_MULT,
cur_plus = inc,
cur_state = other.state,
the_bit = 1u,
distance = 0u;
while (state != cur_state) {
if ((state & the_bit) != (cur_state & the_bit)) {
cur_state = cur_state * cur_mult + cur_plus;
distance |= the_bit;
}
assert((state & the_bit) == (cur_state & the_bit));
the_bit <<= 1;
cur_plus = (cur_mult + 1ULL) * cur_plus;
cur_mult *= cur_mult;
}
return (int64_t) distance;
}
/// Equality operator
__host__ __device__ bool operator==(const pcg32 &other) const { return state == other.state && inc == other.inc; }
/// Inequality operator
__host__ __device__ bool operator!=(const pcg32 &other) const { return state != other.state || inc != other.inc; }
uint64_t state; // RNG state. All values are possible.
uint64_t inc; // Controls which RNG sequence (stream) is selected. Must *always* be odd.
};
}
\ No newline at end of file
...@@ -30,18 +30,23 @@ import trimesh ...@@ -30,18 +30,23 @@ import trimesh
import torch import torch
import cubvh import cubvh
# build BVH from mesh ### build BVH from mesh
mesh = trimesh.load('example.ply') mesh = trimesh.load('example.ply')
BVH = cubvh.cuBVH(mesh.vertices, mesh.faces) # build with numpy.ndarray/torch.Tensor BVH = cubvh.cuBVH(mesh.vertices, mesh.faces) # build with numpy.ndarray/torch.Tensor
### query ray-mesh intersection
# query ray-mesh intersection
rays_o, rays_d = get_ray(pose, intrinsics, H, W) # [N, 3], [N, 3], query with torch.Tensor (cuda) rays_o, rays_d = get_ray(pose, intrinsics, H, W) # [N, 3], [N, 3], query with torch.Tensor (cuda)
intersections, face_normals, depth = BVH.ray_trace(rays_o, rays_d) # [N, 3], [N, 3], [N,] intersections, face_normals, depth = BVH.ray_trace(rays_o, rays_d) # [N, 3], [N, 3], [N,]
# query unsigned distance ### query unsigned distance
points # [N, 3] points # [N, 3]
distances, face_id, uvw = BVH.unsigned_distance(points, return_uvw=True) # [N], [N], [N, 3] distances, face_id, uvw = BVH.unsigned_distance(points, return_uvw=True) # [N], [N], [N, 3]
### query signed distance (INNER is NEGETIVE!)
# for watertight meshes (default)
distances, face_id, uvw = BVH.signed_distance(points, return_uvw=True, mode='watertight') # [N], [N], [N, 3]
# for non-watertight meshes:
distances, face_id, uvw = BVH.signed_distance(points, return_uvw=True, mode='raystab') # [N], [N], [N, 3]
``` ```
......
...@@ -42,7 +42,6 @@ public: ...@@ -42,7 +42,6 @@ public:
} }
// accept torch tensor (gpu) to init
void ray_trace(at::Tensor rays_o, at::Tensor rays_d, at::Tensor positions, at::Tensor face_id, at::Tensor depth) { void ray_trace(at::Tensor rays_o, at::Tensor rays_d, at::Tensor positions, at::Tensor face_id, at::Tensor depth) {
const uint32_t n_elements = rays_o.size(0); const uint32_t n_elements = rays_o.size(0);
...@@ -51,7 +50,6 @@ public: ...@@ -51,7 +50,6 @@ public:
triangle_bvh->ray_trace_gpu(n_elements, rays_o.data_ptr<float>(), rays_d.data_ptr<float>(), positions.data_ptr<float>(), face_id.data_ptr<int64_t>(), depth.data_ptr<float>(), triangles_gpu.data(), stream); triangle_bvh->ray_trace_gpu(n_elements, rays_o.data_ptr<float>(), rays_d.data_ptr<float>(), positions.data_ptr<float>(), face_id.data_ptr<int64_t>(), depth.data_ptr<float>(), triangles_gpu.data(), stream);
} }
// optionally return uvw (barycentric) and face_id
void unsigned_distance(at::Tensor positions, at::Tensor distances, at::Tensor face_id, at::optional<at::Tensor> uvw) { void unsigned_distance(at::Tensor positions, at::Tensor distances, at::Tensor face_id, at::optional<at::Tensor> uvw) {
const uint32_t n_elements = positions.size(0); const uint32_t n_elements = positions.size(0);
...@@ -61,6 +59,14 @@ public: ...@@ -61,6 +59,14 @@ public:
} }
void signed_distance(at::Tensor positions, at::Tensor distances, at::Tensor face_id, at::optional<at::Tensor> uvw, uint32_t mode) {
const uint32_t n_elements = positions.size(0);
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
triangle_bvh->signed_distance_gpu(n_elements, mode, positions.data_ptr<float>(), distances.data_ptr<float>(), face_id.data_ptr<int64_t>(), uvw.has_value() ? uvw.value().data_ptr<float>() : nullptr, triangles_gpu.data(), stream);
}
std::vector<Triangle> triangles_cpu; std::vector<Triangle> triangles_cpu;
GPUMemory<Triangle> triangles_gpu; GPUMemory<Triangle> triangles_gpu;
std::shared_ptr<TriangleBvh> triangle_bvh; std::shared_ptr<TriangleBvh> triangle_bvh;
......
...@@ -14,7 +14,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { ...@@ -14,7 +14,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::class_<cuBVH>(m, "cuBVH") py::class_<cuBVH>(m, "cuBVH")
.def("ray_trace", &cuBVH::ray_trace) .def("ray_trace", &cuBVH::ray_trace)
.def("unsigned_distance", &cuBVH::unsigned_distance); .def("unsigned_distance", &cuBVH::unsigned_distance)
.def("signed_distance", &cuBVH::signed_distance);
m.def("create_cuBVH", &create_cuBVH); m.def("create_cuBVH", &create_cuBVH);
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#include <cubvh/common.h> #include <cubvh/common.h>
#include <cubvh/triangle.cuh> #include <cubvh/triangle.cuh>
#include <cubvh/bvh.cuh> #include <cubvh/bvh.cuh>
#include <cubvh/pcg32.h>
// #ifdef NGP_OPTIX // #ifdef NGP_OPTIX
// # include <optix.h> // # include <optix.h>
...@@ -132,8 +133,8 @@ constexpr float MAX_DIST_SQ = MAX_DIST*MAX_DIST; ...@@ -132,8 +133,8 @@ constexpr float MAX_DIST_SQ = MAX_DIST*MAX_DIST;
// } // }
// #endif //NGP_OPTIX // #endif //NGP_OPTIX
// __global__ void signed_distance_watertight_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float* __restrict__ distances, bool use_existing_distances_as_upper_bounds = false); __global__ void signed_distance_watertight_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, float* __restrict__ distances, int64_t* __restrict__ face_id, Vector3f* __restrict__ uvw, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, bool use_existing_distances_as_upper_bounds);
// __global__ void signed_distance_raystab_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float* __restrict__ distances, bool use_existing_distances_as_upper_bounds = false); __global__ void signed_distance_raystab_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, float* __restrict__ distances, int64_t* __restrict__ face_id, Vector3f* __restrict__ uvw, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, bool use_existing_distances_as_upper_bounds);
__global__ void unsigned_distance_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, float* __restrict__ distances, int64_t* __restrict__ face_id, Vector3f* __restrict__ uvw, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, bool use_existing_distances_as_upper_bounds); __global__ void unsigned_distance_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, float* __restrict__ distances, int64_t* __restrict__ face_id, Vector3f* __restrict__ uvw, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, bool use_existing_distances_as_upper_bounds);
__global__ void raytrace_kernel(uint32_t n_elements, const Vector3f* __restrict__ rays_o, const Vector3f* __restrict__ rays_d, Vector3f* __restrict__ positions, int64_t* __restrict__ face_id, float* __restrict__ depth, const TriangleBvhNode* __restrict__ nodes, const Triangle* __restrict__ triangles); __global__ void raytrace_kernel(uint32_t n_elements, const Vector3f* __restrict__ rays_o, const Vector3f* __restrict__ rays_d, Vector3f* __restrict__ positions, int64_t* __restrict__ face_id, float* __restrict__ depth, const TriangleBvhNode* __restrict__ nodes, const Triangle* __restrict__ triangles);
...@@ -391,76 +392,69 @@ public: ...@@ -391,76 +392,69 @@ public:
return result / total_weight; return result / total_weight;
} }
// __host__ __device__ static float signed_distance_watertight(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float max_distance_sq = MAX_DIST_SQ) { __host__ __device__ static std::pair<int, float> signed_distance_watertight(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float max_distance_sq = MAX_DIST_SQ) {
// auto p = closest_triangle(point, bvhnodes, triangles, max_distance_sq); auto res = closest_triangle(point, bvhnodes, triangles, max_distance_sq);
// const Triangle& tri = triangles[p.first]; const Triangle& tri = triangles[res.first];
// Vector3f closest_point = tri.closest_point(point); Vector3f closest_point = tri.closest_point(point);
// Vector3f avg_normal = avg_normal_around_point(closest_point, bvhnodes, triangles); Vector3f avg_normal = avg_normal_around_point(closest_point, bvhnodes, triangles);
// return std::copysignf(p.second, avg_normal.dot(point - closest_point)); return {res.first, std::copysignf(res.second, avg_normal.dot(point - closest_point))};
// } }
// __host__ __device__ static float signed_distance_raystab(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float max_distance_sq = MAX_DIST_SQ, default_rng_t rng={}) { __host__ __device__ static std::pair<int, float> signed_distance_raystab(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float max_distance_sq = MAX_DIST_SQ, pcg32 rng={}) {
// float distance = unsigned_distance(point, bvhnodes, triangles, max_distance_sq); auto res = closest_triangle(point, bvhnodes, triangles, max_distance_sq);
// Vector2f offset = random_val_2d(rng); Vector2f offset = {rng.next_float(), rng.next_float()};
// static constexpr uint32_t N_STAB_RAYS = 32; static constexpr uint32_t N_STAB_RAYS = 32;
// for (uint32_t i = 0; i < N_STAB_RAYS; ++i) { for (uint32_t i = 0; i < N_STAB_RAYS; ++i) {
// // Use a Fibonacci lattice (with random offset) to regularly // Use a Fibonacci lattice (with random offset) to regularly
// // distribute the stab rays over the sphere. // distribute the stab rays over the sphere.
// Vector3f d = fibonacci_dir<N_STAB_RAYS>(i, offset); Vector3f d = fibonacci_dir<N_STAB_RAYS>(i, offset);
// // If any of the stab rays goes outside the mesh, the SDF is positive. // If any of the stab rays goes outside the mesh, the SDF is positive.
// if (ray_intersect(point, -d, bvhnodes, triangles).first < 0 || ray_intersect(point, d, bvhnodes, triangles).first < 0) { if (ray_intersect(point, -d, bvhnodes, triangles).first < 0 || ray_intersect(point, d, bvhnodes, triangles).first < 0) {
// return distance; return {res.first, res.second};
// } }
// } }
// return -distance; return {res.first, -res.second};
// } }
// // // Assumes that "point" is a location on a triangle
// // Vector3f avg_normal_around_point(const Vector3f& point, const Triangle* __restrict__ triangles) const { void signed_distance_gpu(uint32_t n_elements, uint32_t mode, const float* positions, float* distances, int64_t* face_id, float* uvw, const Triangle* gpu_triangles, cudaStream_t stream) override {
// // return avg_normal_around_point(point, m_nodes.data(), triangles);
// // } const Vector3f* positions_vec = (const Vector3f*)positions;
Vector3f* uvw_vec = (Vector3f*)uvw;
// // float signed_distance(EMeshSdfMode mode, const Vector3f& point, const std::vector<Triangle>& triangles) const {
// // if (mode == EMeshSdfMode::Watertight) { if (mode == 0) {
// // return signed_distance_watertight(point, m_nodes.data(), triangles.data()); // watertight
// // } else { linear_kernel(signed_distance_watertight_kernel, 0u, stream,
// // return signed_distance_raystab(point, m_nodes.data(), triangles.data()); n_elements,
// // } positions_vec,
// // } distances,
face_id,
// void signed_distance_gpu(uint32_t n_elements, EMeshSdfMode mode, const Vector3f* gpu_positions, float* gpu_distances, const Triangle* gpu_triangles, bool use_existing_distances_as_upper_bounds, cudaStream_t stream) override { uvw_vec,
// if (mode == EMeshSdfMode::Watertight) { m_nodes_gpu.data(),
// linear_kernel(signed_distance_watertight_kernel, 0, stream, gpu_triangles,
// n_elements, false
// gpu_positions, );
// m_nodes_gpu.data(),
// gpu_triangles, } else {
// gpu_distances, // raystab
// use_existing_distances_as_upper_bounds linear_kernel(signed_distance_raystab_kernel, 0u, stream,
// ); n_elements,
// } else { positions_vec,
// { distances,
// if (mode == EMeshSdfMode::Raystab) { face_id,
// linear_kernel(signed_distance_raystab_kernel, 0, stream, uvw_vec,
// n_elements, m_nodes_gpu.data(),
// gpu_positions, gpu_triangles,
// m_nodes_gpu.data(), false
// gpu_triangles, );
// gpu_distances, }
// use_existing_distances_as_upper_bounds }
// );
// } else if (mode == EMeshSdfMode::PathEscape) {
// throw std::runtime_error{"TriangleBvh: EMeshSdfMode::PathEscape is only supported with OptiX enabled."};
// }
// }
// }
// }
void unsigned_distance_gpu(uint32_t n_elements, const float* positions, float* distances, int64_t* face_id, float* uvw, const Triangle* gpu_triangles, cudaStream_t stream) override { void unsigned_distance_gpu(uint32_t n_elements, const float* positions, float* distances, int64_t* face_id, float* uvw, const Triangle* gpu_triangles, cudaStream_t stream) override {
...@@ -477,7 +471,6 @@ public: ...@@ -477,7 +471,6 @@ public:
gpu_triangles, gpu_triangles,
false false
); );
} }
void ray_trace_gpu(uint32_t n_elements, const float* rays_o, const float* rays_d, float* positions, int64_t* face_id, float* depth, const Triangle* gpu_triangles, cudaStream_t stream) override { void ray_trace_gpu(uint32_t n_elements, const float* rays_o, const float* rays_d, float* positions, int64_t* face_id, float* depth, const Triangle* gpu_triangles, cudaStream_t stream) override {
...@@ -622,37 +615,61 @@ std::unique_ptr<TriangleBvh> TriangleBvh::make() { ...@@ -622,37 +615,61 @@ std::unique_ptr<TriangleBvh> TriangleBvh::make() {
return std::unique_ptr<TriangleBvh>(new TriangleBvh4()); return std::unique_ptr<TriangleBvh>(new TriangleBvh4());
} }
// __global__ void signed_distance_watertight_kernel(uint32_t n_elements, __global__ void signed_distance_watertight_kernel(
// const Vector3f* __restrict__ positions, uint32_t n_elements, const Vector3f* __restrict__ positions,
// const TriangleBvhNode* __restrict__ bvhnodes, float* __restrict__ distances, int64_t* __restrict__ face_id, Vector3f* __restrict__ uvw,
// const Triangle* __restrict__ triangles, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, bool use_existing_distances_as_upper_bounds
// float* __restrict__ distances, ) {
// bool use_existing_distances_as_upper_bounds uint32_t i = blockIdx.x * blockDim.x + threadIdx.x;
// ) { if (i >= n_elements) return;
// uint32_t i = blockIdx.x * blockDim.x + threadIdx.x;
// if (i >= n_elements) return;
// float max_distance = use_existing_distances_as_upper_bounds ? distances[i] : MAX_DIST;
// distances[i] = TriangleBvh4::signed_distance_watertight(positions[i], bvhnodes, triangles, max_distance*max_distance);
// }
// __global__ void signed_distance_raystab_kernel( float max_distance = use_existing_distances_as_upper_bounds ? distances[i] : MAX_DIST;
// uint32_t n_elements,
// const Vector3f* __restrict__ positions, Vector3f point = positions[i];
// const TriangleBvhNode* __restrict__ bvhnodes,
// const Triangle* __restrict__ triangles, auto res = TriangleBvh4::signed_distance_watertight(point, bvhnodes, triangles, max_distance*max_distance);
// float* __restrict__ distances,
// bool use_existing_distances_as_upper_bounds // write
// ) { distances[i] = res.second;
// uint32_t i = blockIdx.x * blockDim.x + threadIdx.x; face_id[i] = triangles[res.first].id;
// if (i >= n_elements) return;
// optional output
// float max_distance = use_existing_distances_as_upper_bounds ? distances[i] : MAX_DIST; if (uvw) {
// default_rng_t rng; // get closest point
// rng.advance(i * 2); Vector3f cpoint = triangles[res.first].closest_point(point);
// query uvw
// distances[i] = TriangleBvh4::signed_distance_raystab(positions[i], bvhnodes, triangles, max_distance*max_distance, rng); uvw[i] = triangles[res.first].barycentric(cpoint);
// } }
}
__global__ void signed_distance_raystab_kernel(
uint32_t n_elements, const Vector3f* __restrict__ positions,
float* __restrict__ distances, int64_t* __restrict__ face_id, Vector3f* __restrict__ uvw,
const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, bool use_existing_distances_as_upper_bounds
) {
uint32_t i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= n_elements) return;
float max_distance = use_existing_distances_as_upper_bounds ? distances[i] : MAX_DIST;
pcg32 rng;
rng.advance(i * 2);
Vector3f point = positions[i];
auto res = TriangleBvh4::signed_distance_raystab(point, bvhnodes, triangles, max_distance*max_distance, rng);
// write
distances[i] = res.second;
face_id[i] = triangles[res.first].id;
// optional output
if (uvw) {
// get closest point
Vector3f cpoint = triangles[res.first].closest_point(point);
// query uvw
uvw[i] = triangles[res.first].barycentric(cpoint);
}
}
__global__ void unsigned_distance_kernel( __global__ void unsigned_distance_kernel(
uint32_t n_elements, const Vector3f* __restrict__ positions, uint32_t n_elements, const Vector3f* __restrict__ positions,
......
import numpy as np
import trimesh
import argparse
import torch
import cubvh
import time
def create_dodecahedron(radius=1, center=np.array([0, 0, 0])):
vertices = np.array([
-0.57735, -0.57735, 0.57735,
0.934172, 0.356822, 0,
0.934172, -0.356822, 0,
-0.934172, 0.356822, 0,
-0.934172, -0.356822, 0,
0, 0.934172, 0.356822,
0, 0.934172, -0.356822,
0.356822, 0, -0.934172,
-0.356822, 0, -0.934172,
0, -0.934172, -0.356822,
0, -0.934172, 0.356822,
0.356822, 0, 0.934172,
-0.356822, 0, 0.934172,
0.57735, 0.57735, -0.57735,
0.57735, 0.57735, 0.57735,
-0.57735, 0.57735, -0.57735,
-0.57735, 0.57735, 0.57735,
0.57735, -0.57735, -0.57735,
0.57735, -0.57735, 0.57735,
-0.57735, -0.57735, -0.57735,
]).reshape((-1,3), order="C")
faces = np.array([
19, 3, 2,
12, 19, 2,
15, 12, 2,
8, 14, 2,
18, 8, 2,
3, 18, 2,
20, 5, 4,
9, 20, 4,
16, 9, 4,
13, 17, 4,
1, 13, 4,
5, 1, 4,
7, 16, 4,
6, 7, 4,
17, 6, 4,
6, 15, 2,
7, 6, 2,
14, 7, 2,
10, 18, 3,
11, 10, 3,
19, 11, 3,
11, 1, 5,
10, 11, 5,
20, 10, 5,
20, 9, 8,
10, 20, 8,
18, 10, 8,
9, 16, 7,
8, 9, 7,
14, 8, 7,
12, 15, 6,
13, 12, 6,
17, 13, 6,
13, 1, 11,
12, 13, 11,
19, 12, 11,
]).reshape((-1, 3), order="C")-1
length = np.linalg.norm(vertices, axis=1).reshape((-1, 1))
vertices = vertices / length * radius + center
return trimesh.Trimesh(vertices=vertices, faces=faces)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--N', default=1000, type=int)
parser.add_argument('--mesh', default='', type=str)
opt = parser.parse_args()
if opt.mesh == '':
mesh = create_dodecahedron()
else:
mesh = trimesh.load(opt.mesh, force='mesh', skip_material=True)
# query nearest triangles for a set of points
points = torch.randn(opt.N, 3, device='cuda', dtype=torch.float32)
# Ours
_t0 = time.time()
BVH = cubvh.cuBVH(mesh.vertices, mesh.faces)
torch.cuda.synchronize()
_t1 = time.time()
distances, face_id, _ = BVH.signed_distance(points, return_uvw=False, mode='raystab')
torch.cuda.synchronize()
_t2 = time.time()
print(f'[TIME] Ours total {_t2 - _t0:.6f}s = build {_t1 - _t0:.6f}s + query {_t2 - _t1:.6f}s')
# GT results by trimesh
_t0 = time.time()
gt_distances = -trimesh.proximity.signed_distance(mesh, points.cpu().numpy())
_t1 = time.time()
print(f'[TIME] Trimesh total {_t1 - _t0:.6f}s')
# verify correctness
distances = distances.cpu().numpy().astype(np.float32)
np.testing.assert_allclose(
distances,
gt_distances.astype(np.float32),
atol=1e-5
)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment