/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #include "./renderer.h" #include "../include/commands.h" #include "./camera.h" #include "./util.h" #include #ifdef WITH_CUDA #include #include #endif #ifndef TORCH_CHECK_ARG // torch <= 1.10 #define TORCH_CHECK_ARG(cond, argN, ...) \ TORCH_CHECK(cond, "invalid argument ", argN, ": ", __VA_ARGS__) #endif namespace PRE = ::pulsar::Renderer; namespace pulsar { namespace pytorch { Renderer::Renderer( const unsigned int& width, const unsigned int& height, const unsigned int& max_n_balls, const bool& orthogonal_projection, const bool& right_handed_system, const float& background_normalization_depth, const uint& n_channels, const uint& n_track) { LOG_IF(INFO, PULSAR_LOG_INIT) << "Initializing renderer."; TORCH_CHECK_ARG(width > 0, 1, "image width must be > 0!"); TORCH_CHECK_ARG(height > 0, 2, "image height must be > 0!"); TORCH_CHECK_ARG(max_n_balls > 0, 3, "max_n_balls must be > 0!"); TORCH_CHECK_ARG( background_normalization_depth > 0.f && background_normalization_depth < 1.f, 5, "background_normalization_depth must be in ]0., 1.["); TORCH_CHECK_ARG(n_channels > 0, 6, "n_channels must be > 0"); TORCH_CHECK_ARG( n_track > 0 && n_track <= MAX_GRAD_SPHERES, 7, ("n_track must be > 0 and <" + std::to_string(MAX_GRAD_SPHERES) + ". Is " + std::to_string(n_track) + ".") .c_str()); LOG_IF(INFO, PULSAR_LOG_INIT) << "Image width: " << width << ", height: " << height; this->renderer_vec.emplace_back(); this->device_type = c10::DeviceType::CPU; this->device_index = -1; PRE::construct( this->renderer_vec.data(), max_n_balls, width, height, orthogonal_projection, right_handed_system, background_normalization_depth, n_channels, n_track); this->device_tracker = torch::zeros(1); }; Renderer::~Renderer() { if (this->device_type == c10::DeviceType::CUDA) { // Can't happen in the case that not compiled with CUDA. #ifdef WITH_CUDA at::cuda::CUDAGuard device_guard(this->device_tracker.device()); for (auto nrend : this->renderer_vec) { PRE::destruct(&nrend); } #endif } else { for (auto nrend : this->renderer_vec) { PRE::destruct(&nrend); } } } bool Renderer::operator==(const Renderer& rhs) const { LOG_IF(INFO, PULSAR_LOG_INIT) << "Equality check."; bool renderer_agrees = (this->renderer_vec[0] == rhs.renderer_vec[0]); LOG_IF(INFO, PULSAR_LOG_INIT) << " Renderer agrees: " << renderer_agrees; bool device_agrees = (this->device_tracker.device() == rhs.device_tracker.device()); LOG_IF(INFO, PULSAR_LOG_INIT) << " Device agrees: " << device_agrees; return (renderer_agrees && device_agrees); }; void Renderer::ensure_on_device(torch::Device device, bool /*non_blocking*/) { TORCH_CHECK_ARG( device.type() == c10::DeviceType::CUDA || device.type() == c10::DeviceType::CPU, 1, "Only CPU and CUDA device types are supported."); if (device.type() != this->device_type || device.index() != this->device_index) { #ifdef WITH_CUDA LOG_IF(INFO, PULSAR_LOG_INIT) << "Transferring render buffers between devices."; int prev_active; cudaGetDevice(&prev_active); if (this->device_type == c10::DeviceType::CUDA) { LOG_IF(INFO, PULSAR_LOG_INIT) << " Destructing on CUDA."; cudaSetDevice(this->device_index); for (auto& nrend : this->renderer_vec) { PRE::destruct(&nrend); } } else { LOG_IF(INFO, PULSAR_LOG_INIT) << " Destructing on CPU."; for (auto& nrend : this->renderer_vec) { PRE::destruct(&nrend); } } if (device.type() == c10::DeviceType::CUDA) { LOG_IF(INFO, PULSAR_LOG_INIT) << " Constructing on CUDA."; cudaSetDevice(device.index()); for (auto& nrend : this->renderer_vec) { PRE::construct( &nrend, this->renderer_vec[0].max_num_balls, this->renderer_vec[0].cam.film_width, this->renderer_vec[0].cam.film_height, this->renderer_vec[0].cam.orthogonal_projection, this->renderer_vec[0].cam.right_handed, this->renderer_vec[0].cam.background_normalization_depth, this->renderer_vec[0].cam.n_channels, this->n_track()); } } else { LOG_IF(INFO, PULSAR_LOG_INIT) << " Constructing on CPU."; for (auto& nrend : this->renderer_vec) { PRE::construct( &nrend, this->renderer_vec[0].max_num_balls, this->renderer_vec[0].cam.film_width, this->renderer_vec[0].cam.film_height, this->renderer_vec[0].cam.orthogonal_projection, this->renderer_vec[0].cam.right_handed, this->renderer_vec[0].cam.background_normalization_depth, this->renderer_vec[0].cam.n_channels, this->n_track()); } } cudaSetDevice(prev_active); this->device_type = device.type(); this->device_index = device.index(); #else throw std::runtime_error( "pulsar was built without CUDA " "but a device move to a CUDA device was initiated."); #endif } }; void Renderer::ensure_n_renderers_gte(const size_t& batch_size) { if (this->renderer_vec.size() < batch_size) { ptrdiff_t diff = batch_size - this->renderer_vec.size(); LOG_IF(INFO, PULSAR_LOG_INIT) << "Increasing render buffers by " << diff << " to account for batch size " << batch_size; for (ptrdiff_t i = 0; i < diff; ++i) { this->renderer_vec.emplace_back(); if (this->device_type == c10::DeviceType::CUDA) { #ifdef WITH_CUDA PRE::construct( &this->renderer_vec[this->renderer_vec.size() - 1], this->max_num_balls(), this->width(), this->height(), this->renderer_vec[0].cam.orthogonal_projection, this->renderer_vec[0].cam.right_handed, this->renderer_vec[0].cam.background_normalization_depth, this->renderer_vec[0].cam.n_channels, this->n_track()); #endif } else { PRE::construct( &this->renderer_vec[this->renderer_vec.size() - 1], this->max_num_balls(), this->width(), this->height(), this->renderer_vec[0].cam.orthogonal_projection, this->renderer_vec[0].cam.right_handed, this->renderer_vec[0].cam.background_normalization_depth, this->renderer_vec[0].cam.n_channels, this->n_track()); } } } } std::tuple Renderer::arg_check( const torch::Tensor& vert_pos, const torch::Tensor& vert_col, const torch::Tensor& vert_radii, const torch::Tensor& cam_pos, const torch::Tensor& pixel_0_0_center, const torch::Tensor& pixel_vec_x, const torch::Tensor& pixel_vec_y, const torch::Tensor& focal_length, const torch::Tensor& principal_point_offsets, const float& gamma, const float& max_depth, float& min_depth, const std::optional& bg_col, const std::optional& opacity, const float& percent_allowed_difference, const uint& max_n_hits, const uint& mode) { LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) << "Arg check."; size_t batch_size = 1; size_t n_points; bool batch_processing = false; if (vert_pos.ndimension() == 3) { // Check all parameters adhere batch size. batch_processing = true; batch_size = vert_pos.size(0); TORCH_CHECK_ARG( vert_col.ndimension() == 3 && vert_col.size(0) == static_cast(batch_size), 2, "vert_col needs to have batch size."); TORCH_CHECK_ARG( vert_radii.ndimension() == 2 && vert_radii.size(0) == static_cast(batch_size), 3, "vert_radii must be specified per batch."); TORCH_CHECK_ARG( cam_pos.ndimension() == 2 && cam_pos.size(0) == static_cast(batch_size), 4, "cam_pos must be specified per batch and have the correct batch size."); TORCH_CHECK_ARG( pixel_0_0_center.ndimension() == 2 && pixel_0_0_center.size(0) == static_cast(batch_size), 5, "pixel_0_0_center must be specified per batch."); TORCH_CHECK_ARG( pixel_vec_x.ndimension() == 2 && pixel_vec_x.size(0) == static_cast(batch_size), 6, "pixel_vec_x must be specified per batch."); TORCH_CHECK_ARG( pixel_vec_y.ndimension() == 2 && pixel_vec_y.size(0) == static_cast(batch_size), 7, "pixel_vec_y must be specified per batch."); TORCH_CHECK_ARG( focal_length.ndimension() == 1 && focal_length.size(0) == static_cast(batch_size), 8, "focal_length must be specified per batch."); TORCH_CHECK_ARG( principal_point_offsets.ndimension() == 2 && principal_point_offsets.size(0) == static_cast(batch_size), 9, "principal_point_offsets must be specified per batch."); if (opacity.has_value()) { TORCH_CHECK_ARG( opacity.value().ndimension() == 2 && opacity.value().size(0) == static_cast(batch_size), 13, "Opacity needs to be specified batch-wise."); } // Check all parameters are for a matching number of points. n_points = vert_pos.size(1); TORCH_CHECK_ARG( vert_col.size(1) == static_cast(n_points), 2, ("The number of points for vertex positions (" + std::to_string(n_points) + ") and vertex colors (" + std::to_string(vert_col.size(1)) + ") doesn't agree.") .c_str()); TORCH_CHECK_ARG( vert_radii.size(1) == static_cast(n_points), 3, ("The number of points for vertex positions (" + std::to_string(n_points) + ") and vertex radii (" + std::to_string(vert_col.size(1)) + ") doesn't agree.") .c_str()); if (opacity.has_value()) { TORCH_CHECK_ARG( opacity.value().size(1) == static_cast(n_points), 13, "Opacity needs to be specified per point."); } // Check all parameters have the correct last dimension size. TORCH_CHECK_ARG( vert_pos.size(2) == 3, 1, ("Vertex positions must be 3D (have shape " + std::to_string(vert_pos.size(2)) + ")!") .c_str()); TORCH_CHECK_ARG( vert_col.size(2) == this->renderer_vec[0].cam.n_channels, 2, ("Vertex colors must have the right number of channels (have shape " + std::to_string(vert_col.size(2)) + ", need " + std::to_string(this->renderer_vec[0].cam.n_channels) + ")!") .c_str()); TORCH_CHECK_ARG( cam_pos.size(1) == 3, 4, ("Camera position must be 3D (has shape " + std::to_string(cam_pos.size(1)) + ")!") .c_str()); TORCH_CHECK_ARG( pixel_0_0_center.size(1) == 3, 5, ("pixel_0_0_center must be 3D (has shape " + std::to_string(pixel_0_0_center.size(1)) + ")!") .c_str()); TORCH_CHECK_ARG( pixel_vec_x.size(1) == 3, 6, ("pixel_vec_x must be 3D (has shape " + std::to_string(pixel_vec_x.size(1)) + ")!") .c_str()); TORCH_CHECK_ARG( pixel_vec_y.size(1) == 3, 7, ("pixel_vec_y must be 3D (has shape " + std::to_string(pixel_vec_y.size(1)) + ")!") .c_str()); TORCH_CHECK_ARG( principal_point_offsets.size(1) == 2, 9, "principal_point_offsets must contain x and y offsets."); // Ensure enough renderers are available for the batch. ensure_n_renderers_gte(batch_size); } else { // Check all parameters are of correct dimension. TORCH_CHECK_ARG( vert_col.ndimension() == 2, 2, "vert_col needs to have dimension 2."); TORCH_CHECK_ARG( vert_radii.ndimension() == 1, 3, "vert_radii must have dimension 1."); TORCH_CHECK_ARG( cam_pos.ndimension() == 1, 4, "cam_pos must have dimension 1."); TORCH_CHECK_ARG( pixel_0_0_center.ndimension() == 1, 5, "pixel_0_0_center must have dimension 1."); TORCH_CHECK_ARG( pixel_vec_x.ndimension() == 1, 6, "pixel_vec_x must have dimension 1."); TORCH_CHECK_ARG( pixel_vec_y.ndimension() == 1, 7, "pixel_vec_y must have dimension 1."); TORCH_CHECK_ARG( focal_length.ndimension() == 0, 8, "focal_length must have dimension 0."); TORCH_CHECK_ARG( principal_point_offsets.ndimension() == 1, 9, "principal_point_offsets must have dimension 1."); if (opacity.has_value()) { TORCH_CHECK_ARG( opacity.value().ndimension() == 1, 13, "Opacity needs to be specified per sample."); } // Check each. n_points = vert_pos.size(0); TORCH_CHECK_ARG( vert_col.size(0) == static_cast(n_points), 2, ("The number of points for vertex positions (" + std::to_string(n_points) + ") and vertex colors (" + std::to_string(vert_col.size(0)) + ") doesn't agree.") .c_str()); TORCH_CHECK_ARG( vert_radii.size(0) == static_cast(n_points), 3, ("The number of points for vertex positions (" + std::to_string(n_points) + ") and vertex radii (" + std::to_string(vert_col.size(0)) + ") doesn't agree.") .c_str()); if (opacity.has_value()) { TORCH_CHECK_ARG( opacity.value().size(0) == static_cast(n_points), 12, "Opacity needs to be specified per point."); } // Check all parameters have the correct last dimension size. TORCH_CHECK_ARG( vert_pos.size(1) == 3, 1, ("Vertex positions must be 3D (have shape " + std::to_string(vert_pos.size(1)) + ")!") .c_str()); TORCH_CHECK_ARG( vert_col.size(1) == this->renderer_vec[0].cam.n_channels, 2, ("Vertex colors must have the right number of channels (have shape " + std::to_string(vert_col.size(1)) + ", need " + std::to_string(this->renderer_vec[0].cam.n_channels) + ")!") .c_str()); TORCH_CHECK_ARG( cam_pos.size(0) == 3, 4, ("Camera position must be 3D (has shape " + std::to_string(cam_pos.size(0)) + ")!") .c_str()); TORCH_CHECK_ARG( pixel_0_0_center.size(0) == 3, 5, ("pixel_0_0_center must be 3D (has shape " + std::to_string(pixel_0_0_center.size(0)) + ")!") .c_str()); TORCH_CHECK_ARG( pixel_vec_x.size(0) == 3, 6, ("pixel_vec_x must be 3D (has shape " + std::to_string(pixel_vec_x.size(0)) + ")!") .c_str()); TORCH_CHECK_ARG( pixel_vec_y.size(0) == 3, 7, ("pixel_vec_y must be 3D (has shape " + std::to_string(pixel_vec_y.size(0)) + ")!") .c_str()); TORCH_CHECK_ARG( principal_point_offsets.size(0) == 2, 9, "principal_point_offsets must have x and y component."); } // Check device placement. auto dev = torch::device_of(vert_pos).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 1, ("Vertex positions must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(vert_col).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 2, ("Vertex colors must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(vert_radii).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 3, ("Vertex radii must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(cam_pos).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 4, ("Camera position must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(pixel_0_0_center).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 5, ("pixel_0_0_center must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(pixel_vec_x).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 6, ("pixel_vec_x must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(pixel_vec_y).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 7, ("pixel_vec_y must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(principal_point_offsets).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 9, ("principal_point_offsets must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); if (opacity.has_value()) { dev = torch::device_of(opacity.value()).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 13, ("opacity must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Is stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); } // Type checks. TORCH_CHECK_ARG( vert_pos.scalar_type() == c10::kFloat, 1, "pulsar requires float types."); TORCH_CHECK_ARG( vert_col.scalar_type() == c10::kFloat, 2, "pulsar requires float types."); TORCH_CHECK_ARG( vert_radii.scalar_type() == c10::kFloat, 3, "pulsar requires float types."); TORCH_CHECK_ARG( cam_pos.scalar_type() == c10::kFloat, 4, "pulsar requires float types."); TORCH_CHECK_ARG( pixel_0_0_center.scalar_type() == c10::kFloat, 5, "pulsar requires float types."); TORCH_CHECK_ARG( pixel_vec_x.scalar_type() == c10::kFloat, 6, "pulsar requires float types."); TORCH_CHECK_ARG( pixel_vec_y.scalar_type() == c10::kFloat, 7, "pulsar requires float types."); TORCH_CHECK_ARG( focal_length.scalar_type() == c10::kFloat, 8, "pulsar requires float types."); TORCH_CHECK_ARG( // Unfortunately, the PyTorch interface is inconsistent for // Int32: in Python, there exists an explicit int32 type, in // C++ this is currently `c10::kInt`. principal_point_offsets.scalar_type() == c10::kInt, 9, "principal_point_offsets must be provided as int32."); if (opacity.has_value()) { TORCH_CHECK_ARG( opacity.value().scalar_type() == c10::kFloat, 13, "opacity must be a float type."); } // Content checks. TORCH_CHECK_ARG( (vert_radii > FEPS).all().item(), 3, ("Vertex radii must be > FEPS (min is " + std::to_string(vert_radii.min().item()) + ").") .c_str()); if (this->orthogonal()) { TORCH_CHECK_ARG( (focal_length == 0.f).all().item(), 8, ("for an orthogonal projection focal length must be zero (abs max: " + std::to_string(focal_length.abs().max().item()) + ").") .c_str()); } else { TORCH_CHECK_ARG( (focal_length > FEPS).all().item(), 8, ("for a perspective projection focal length must be > FEPS (min " + std::to_string(focal_length.min().item()) + ").") .c_str()); } TORCH_CHECK_ARG( gamma <= 1.f && gamma >= 1E-5f, 10, ("gamma must be in [1E-5, 1] (" + std::to_string(gamma) + ").").c_str()); if (min_depth == 0.f) { min_depth = focal_length.max().item() + 2.f * FEPS; } TORCH_CHECK_ARG( min_depth > focal_length.max().item(), 12, ("min_depth must be > focal_length (" + std::to_string(min_depth) + " vs. " + std::to_string(focal_length.max().item()) + ").") .c_str()); TORCH_CHECK_ARG( max_depth > min_depth + FEPS, 11, ("max_depth must be > min_depth + FEPS (" + std::to_string(max_depth) + " vs. " + std::to_string(min_depth + FEPS) + ").") .c_str()); TORCH_CHECK_ARG( percent_allowed_difference >= 0.f && percent_allowed_difference < 1.f, 14, ("percent_allowed_difference must be in [0., 1.[ (" + std::to_string(percent_allowed_difference) + ").") .c_str()); TORCH_CHECK_ARG(max_n_hits > 0, 14, "max_n_hits must be > 0!"); TORCH_CHECK_ARG(mode < 2, 15, "mode must be in {0, 1}."); torch::Tensor real_bg_col; if (bg_col.has_value()) { TORCH_CHECK_ARG( bg_col.value().device().type() == this->device_type && bg_col.value().device().index() == this->device_index, 13, "bg_col must be stored on the renderer device!"); TORCH_CHECK_ARG( bg_col.value().ndimension() == 1 && bg_col.value().size(0) == renderer_vec[0].cam.n_channels, 13, "bg_col must have the same number of channels as the image,)."); real_bg_col = bg_col.value(); } else { real_bg_col = torch::ones( {renderer_vec[0].cam.n_channels}, c10::Device(this->device_type, this->device_index)) .to(c10::kFloat); } if (opacity.has_value()) { TORCH_CHECK_ARG( (opacity.value() >= 0.f).all().item(), 13, "opacity must be >= 0."); TORCH_CHECK_ARG( (opacity.value() <= 1.f).all().item(), 13, "opacity must be <= 1."); } LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) << " batch_size: " << batch_size; LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) << " n_points: " << n_points; LOG_IF(INFO, PULSAR_LOG_FORWARD || PULSAR_LOG_BACKWARD) << " batch_processing: " << batch_processing; return std::tuple( batch_size, n_points, batch_processing, real_bg_col); } std::tuple Renderer::forward( const torch::Tensor& vert_pos, const torch::Tensor& vert_col, const torch::Tensor& vert_radii, const torch::Tensor& cam_pos, const torch::Tensor& pixel_0_0_center, const torch::Tensor& pixel_vec_x, const torch::Tensor& pixel_vec_y, const torch::Tensor& focal_length, const torch::Tensor& principal_point_offsets, const float& gamma, const float& max_depth, float min_depth, const std::optional& bg_col, const std::optional& opacity, const float& percent_allowed_difference, const uint& max_n_hits, const uint& mode) { // Parameter checks. this->ensure_on_device(this->device_tracker.device()); size_t batch_size; size_t n_points; bool batch_processing; torch::Tensor real_bg_col; std::tie(batch_size, n_points, batch_processing, real_bg_col) = this->arg_check( vert_pos, vert_col, vert_radii, cam_pos, pixel_0_0_center, pixel_vec_x, pixel_vec_y, focal_length, principal_point_offsets, gamma, max_depth, min_depth, bg_col, opacity, percent_allowed_difference, max_n_hits, mode); LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Extracting camera objects..."; // Create the camera information. std::vector cam_infos(batch_size); if (batch_processing) { for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { cam_infos[batch_i] = cam_info_from_params( cam_pos[batch_i], pixel_0_0_center[batch_i], pixel_vec_x[batch_i], pixel_vec_y[batch_i], principal_point_offsets[batch_i], focal_length[batch_i].item(), this->renderer_vec[0].cam.film_width, this->renderer_vec[0].cam.film_height, min_depth, max_depth, this->renderer_vec[0].cam.right_handed); } } else { cam_infos[0] = cam_info_from_params( cam_pos, pixel_0_0_center, pixel_vec_x, pixel_vec_y, principal_point_offsets, focal_length.item(), this->renderer_vec[0].cam.film_width, this->renderer_vec[0].cam.film_height, min_depth, max_depth, this->renderer_vec[0].cam.right_handed); } LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Processing..."; // Let's go! // Contiguous version of opacity, if available. We need to create this object // in scope to keep it alive. torch::Tensor opacity_contiguous; float const* opacity_ptr = nullptr; if (opacity.has_value()) { opacity_contiguous = opacity.value().contiguous(); opacity_ptr = opacity_contiguous.data_ptr(); } if (this->device_type == c10::DeviceType::CUDA) { // No else check necessary - if not compiled with CUDA // we can't even reach this code (the renderer can't be // moved to a CUDA device). #ifdef WITH_CUDA int prev_active; cudaGetDevice(&prev_active); cudaSetDevice(this->device_index); #ifdef PULSAR_TIMINGS_BATCHED_ENABLED START_TIME_CU(batch_forward); #endif if (batch_processing) { for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { // These calls are non-blocking and just kick off the computations. PRE::forward( &this->renderer_vec[batch_i], vert_pos[batch_i].contiguous().data_ptr(), vert_col[batch_i].contiguous().data_ptr(), vert_radii[batch_i].contiguous().data_ptr(), cam_infos[batch_i], gamma, percent_allowed_difference, max_n_hits, real_bg_col.contiguous().data_ptr(), opacity_ptr, n_points, mode, at::cuda::getCurrentCUDAStream()); } } else { PRE::forward( this->renderer_vec.data(), vert_pos.contiguous().data_ptr(), vert_col.contiguous().data_ptr(), vert_radii.contiguous().data_ptr(), cam_infos[0], gamma, percent_allowed_difference, max_n_hits, real_bg_col.contiguous().data_ptr(), opacity_ptr, n_points, mode, at::cuda::getCurrentCUDAStream()); } #ifdef PULSAR_TIMINGS_BATCHED_ENABLED STOP_TIME_CU(batch_forward); float time_ms; GET_TIME_CU(batch_forward, &time_ms); std::cout << "Forward render batched time per example: " << time_ms / static_cast(batch_size) << "ms" << std::endl; #endif cudaSetDevice(prev_active); #endif } else { #ifdef PULSAR_TIMINGS_BATCHED_ENABLED START_TIME(batch_forward); #endif if (batch_processing) { for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { // These calls are non-blocking and just kick off the computations. PRE::forward( &this->renderer_vec[batch_i], vert_pos[batch_i].contiguous().data_ptr(), vert_col[batch_i].contiguous().data_ptr(), vert_radii[batch_i].contiguous().data_ptr(), cam_infos[batch_i], gamma, percent_allowed_difference, max_n_hits, real_bg_col.contiguous().data_ptr(), opacity_ptr, n_points, mode, nullptr); } } else { PRE::forward( this->renderer_vec.data(), vert_pos.contiguous().data_ptr(), vert_col.contiguous().data_ptr(), vert_radii.contiguous().data_ptr(), cam_infos[0], gamma, percent_allowed_difference, max_n_hits, real_bg_col.contiguous().data_ptr(), opacity_ptr, n_points, mode, nullptr); } #ifdef PULSAR_TIMINGS_BATCHED_ENABLED STOP_TIME(batch_forward); float time_ms; GET_TIME(batch_forward, &time_ms); std::cout << "Forward render batched time per example: " << time_ms / static_cast(batch_size) << "ms" << std::endl; #endif } LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Extracting results..."; // Create the results. std::vector results(batch_size); std::vector forw_infos(batch_size); for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { results[batch_i] = from_blob( this->renderer_vec[batch_i].result_d, {this->renderer_vec[0].cam.film_height, this->renderer_vec[0].cam.film_width, this->renderer_vec[0].cam.n_channels}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); if (mode == 1) results[batch_i] = results[batch_i].slice(2, 0, 1, 1); forw_infos[batch_i] = from_blob( this->renderer_vec[batch_i].forw_info_d, {this->renderer_vec[0].cam.film_height, this->renderer_vec[0].cam.film_width, 3 + 2 * this->n_track()}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Forward render complete."; if (batch_processing) { return std::tuple( torch::stack(results), torch::stack(forw_infos)); } else { return std::tuple(results[0], forw_infos[0]); } }; std::tuple< at::optional, at::optional, at::optional, at::optional, at::optional, at::optional, at::optional, at::optional> Renderer::backward( const torch::Tensor& grad_im, const torch::Tensor& image, const torch::Tensor& forw_info, const torch::Tensor& vert_pos, const torch::Tensor& vert_col, const torch::Tensor& vert_radii, const torch::Tensor& cam_pos, const torch::Tensor& pixel_0_0_center, const torch::Tensor& pixel_vec_x, const torch::Tensor& pixel_vec_y, const torch::Tensor& focal_length, const torch::Tensor& principal_point_offsets, const float& gamma, const float& max_depth, float min_depth, const std::optional& bg_col, const std::optional& opacity, const float& percent_allowed_difference, const uint& max_n_hits, const uint& mode, const bool& dif_pos, const bool& dif_col, const bool& dif_rad, const bool& dif_cam, const bool& dif_opy, const at::optional>& dbg_pos) { this->ensure_on_device(this->device_tracker.device()); size_t batch_size; size_t n_points; bool batch_processing; torch::Tensor real_bg_col; std::tie(batch_size, n_points, batch_processing, real_bg_col) = this->arg_check( vert_pos, vert_col, vert_radii, cam_pos, pixel_0_0_center, pixel_vec_x, pixel_vec_y, focal_length, principal_point_offsets, gamma, max_depth, min_depth, bg_col, opacity, percent_allowed_difference, max_n_hits, mode); // Additional checks for the gradient computation. TORCH_CHECK_ARG( (grad_im.ndimension() == 3 + batch_processing && static_cast(grad_im.size(0 + batch_processing)) == this->height() && static_cast(grad_im.size(1 + batch_processing)) == this->width() && static_cast(grad_im.size(2 + batch_processing)) == this->renderer_vec[0].cam.n_channels), 1, "The gradient image size is not correct."); TORCH_CHECK_ARG( (image.ndimension() == 3 + batch_processing && static_cast(image.size(0 + batch_processing)) == this->height() && static_cast(image.size(1 + batch_processing)) == this->width() && static_cast(image.size(2 + batch_processing)) == this->renderer_vec[0].cam.n_channels), 2, "The result image size is not correct."); TORCH_CHECK_ARG( grad_im.scalar_type() == c10::kFloat, 1, "The gradient image must be of float type."); TORCH_CHECK_ARG( image.scalar_type() == c10::kFloat, 2, "The image must be of float type."); if (dif_opy) { TORCH_CHECK_ARG( opacity.has_value(), 13, "dif_opy set requires opacity values."); } if (batch_processing) { TORCH_CHECK_ARG( grad_im.size(0) == static_cast(batch_size), 1, "Gradient image batch size must agree."); TORCH_CHECK_ARG( image.size(0) == static_cast(batch_size), 2, "Image batch size must agree."); TORCH_CHECK_ARG( forw_info.size(0) == static_cast(batch_size), 3, "forward info must have batch size."); } TORCH_CHECK_ARG( (forw_info.ndimension() == 3 + batch_processing && static_cast(forw_info.size(0 + batch_processing)) == this->height() && static_cast(forw_info.size(1 + batch_processing)) == this->width() && static_cast(forw_info.size(2 + batch_processing)) == 3 + 2 * this->n_track()), 3, "The forward info image size is not correct."); TORCH_CHECK_ARG( forw_info.scalar_type() == c10::kFloat, 3, "The forward info must be of float type."); // Check device. auto dev = torch::device_of(grad_im).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 1, ("grad_im must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(image).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 2, ("image must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); dev = torch::device_of(forw_info).value(); TORCH_CHECK_ARG( dev.type() == this->device_type && dev.index() == this->device_index, 3, ("forw_info must be stored on device " + c10::DeviceTypeName(this->device_type) + ", index " + std::to_string(this->device_index) + "! Are stored on " + c10::DeviceTypeName(dev.type()) + ", index " + std::to_string(dev.index()) + ".") .c_str()); if (dbg_pos.has_value()) { TORCH_CHECK_ARG( dbg_pos.value().first < this->width() && dbg_pos.value().second < this->height(), 23, "The debug position must be within image bounds."); } // Prepare the return value. std::tuple< at::optional, at::optional, at::optional, at::optional, at::optional, at::optional, at::optional, at::optional> ret; if (mode == 1 || (!dif_pos && !dif_col && !dif_rad && !dif_cam && !dif_opy)) { return ret; } // Create the camera information. std::vector cam_infos(batch_size); if (batch_processing) { for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { cam_infos[batch_i] = cam_info_from_params( cam_pos[batch_i], pixel_0_0_center[batch_i], pixel_vec_x[batch_i], pixel_vec_y[batch_i], principal_point_offsets[batch_i], focal_length[batch_i].item(), this->renderer_vec[0].cam.film_width, this->renderer_vec[0].cam.film_height, min_depth, max_depth, this->renderer_vec[0].cam.right_handed); } } else { cam_infos[0] = cam_info_from_params( cam_pos, pixel_0_0_center, pixel_vec_x, pixel_vec_y, principal_point_offsets, focal_length.item(), this->renderer_vec[0].cam.film_width, this->renderer_vec[0].cam.film_height, min_depth, max_depth, this->renderer_vec[0].cam.right_handed); } // Let's go! // Contiguous version of opacity, if available. We need to create this object // in scope to keep it alive. torch::Tensor opacity_contiguous; float const* opacity_ptr = nullptr; if (opacity.has_value()) { opacity_contiguous = opacity.value().contiguous(); opacity_ptr = opacity_contiguous.data_ptr(); } if (this->device_type == c10::DeviceType::CUDA) { // No else check necessary - it's not possible to move // the renderer to a CUDA device if not built with CUDA. #ifdef WITH_CUDA int prev_active; cudaGetDevice(&prev_active); cudaSetDevice(this->device_index); #ifdef PULSAR_TIMINGS_BATCHED_ENABLED START_TIME_CU(batch_backward); #endif if (batch_processing) { for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { // These calls are non-blocking and just kick off the computations. if (dbg_pos.has_value()) { PRE::backward_dbg( &this->renderer_vec[batch_i], grad_im[batch_i].contiguous().data_ptr(), image[batch_i].contiguous().data_ptr(), forw_info[batch_i].contiguous().data_ptr(), vert_pos[batch_i].contiguous().data_ptr(), vert_col[batch_i].contiguous().data_ptr(), vert_radii[batch_i].contiguous().data_ptr(), cam_infos[batch_i], gamma, percent_allowed_difference, max_n_hits, opacity_ptr, n_points, mode, dif_pos, dif_col, dif_rad, dif_cam, dif_opy, dbg_pos.value().first, dbg_pos.value().second, at::cuda::getCurrentCUDAStream()); } else { PRE::backward( &this->renderer_vec[batch_i], grad_im[batch_i].contiguous().data_ptr(), image[batch_i].contiguous().data_ptr(), forw_info[batch_i].contiguous().data_ptr(), vert_pos[batch_i].contiguous().data_ptr(), vert_col[batch_i].contiguous().data_ptr(), vert_radii[batch_i].contiguous().data_ptr(), cam_infos[batch_i], gamma, percent_allowed_difference, max_n_hits, opacity_ptr, n_points, mode, dif_pos, dif_col, dif_rad, dif_cam, dif_opy, at::cuda::getCurrentCUDAStream()); } } } else { if (dbg_pos.has_value()) { PRE::backward_dbg( this->renderer_vec.data(), grad_im.contiguous().data_ptr(), image.contiguous().data_ptr(), forw_info.contiguous().data_ptr(), vert_pos.contiguous().data_ptr(), vert_col.contiguous().data_ptr(), vert_radii.contiguous().data_ptr(), cam_infos[0], gamma, percent_allowed_difference, max_n_hits, opacity_ptr, n_points, mode, dif_pos, dif_col, dif_rad, dif_cam, dif_opy, dbg_pos.value().first, dbg_pos.value().second, at::cuda::getCurrentCUDAStream()); } else { PRE::backward( this->renderer_vec.data(), grad_im.contiguous().data_ptr(), image.contiguous().data_ptr(), forw_info.contiguous().data_ptr(), vert_pos.contiguous().data_ptr(), vert_col.contiguous().data_ptr(), vert_radii.contiguous().data_ptr(), cam_infos[0], gamma, percent_allowed_difference, max_n_hits, opacity_ptr, n_points, mode, dif_pos, dif_col, dif_rad, dif_cam, dif_opy, at::cuda::getCurrentCUDAStream()); } } cudaSetDevice(prev_active); #ifdef PULSAR_TIMINGS_BATCHED_ENABLED STOP_TIME_CU(batch_backward); float time_ms; GET_TIME_CU(batch_backward, &time_ms); std::cout << "Backward render batched time per example: " << time_ms / static_cast(batch_size) << "ms" << std::endl; #endif #endif // WITH_CUDA } else { #ifdef PULSAR_TIMINGS_BATCHED_ENABLED START_TIME(batch_backward); #endif if (batch_processing) { for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { // These calls are non-blocking and just kick off the computations. if (dbg_pos.has_value()) { PRE::backward_dbg( &this->renderer_vec[batch_i], grad_im[batch_i].contiguous().data_ptr(), image[batch_i].contiguous().data_ptr(), forw_info[batch_i].contiguous().data_ptr(), vert_pos[batch_i].contiguous().data_ptr(), vert_col[batch_i].contiguous().data_ptr(), vert_radii[batch_i].contiguous().data_ptr(), cam_infos[batch_i], gamma, percent_allowed_difference, max_n_hits, opacity_ptr, n_points, mode, dif_pos, dif_col, dif_rad, dif_cam, dif_opy, dbg_pos.value().first, dbg_pos.value().second, nullptr); } else { PRE::backward( &this->renderer_vec[batch_i], grad_im[batch_i].contiguous().data_ptr(), image[batch_i].contiguous().data_ptr(), forw_info[batch_i].contiguous().data_ptr(), vert_pos[batch_i].contiguous().data_ptr(), vert_col[batch_i].contiguous().data_ptr(), vert_radii[batch_i].contiguous().data_ptr(), cam_infos[batch_i], gamma, percent_allowed_difference, max_n_hits, opacity_ptr, n_points, mode, dif_pos, dif_col, dif_rad, dif_cam, dif_opy, nullptr); } } } else { if (dbg_pos.has_value()) { PRE::backward_dbg( this->renderer_vec.data(), grad_im.contiguous().data_ptr(), image.contiguous().data_ptr(), forw_info.contiguous().data_ptr(), vert_pos.contiguous().data_ptr(), vert_col.contiguous().data_ptr(), vert_radii.contiguous().data_ptr(), cam_infos[0], gamma, percent_allowed_difference, max_n_hits, opacity_ptr, n_points, mode, dif_pos, dif_col, dif_rad, dif_cam, dif_opy, dbg_pos.value().first, dbg_pos.value().second, nullptr); } else { PRE::backward( this->renderer_vec.data(), grad_im.contiguous().data_ptr(), image.contiguous().data_ptr(), forw_info.contiguous().data_ptr(), vert_pos.contiguous().data_ptr(), vert_col.contiguous().data_ptr(), vert_radii.contiguous().data_ptr(), cam_infos[0], gamma, percent_allowed_difference, max_n_hits, opacity_ptr, n_points, mode, dif_pos, dif_col, dif_rad, dif_cam, dif_opy, nullptr); } } #ifdef PULSAR_TIMINGS_BATCHED_ENABLED STOP_TIME(batch_backward); float time_ms; GET_TIME(batch_backward, &time_ms); std::cout << "Backward render batched time per example: " << time_ms / static_cast(batch_size) << "ms" << std::endl; #endif } if (dif_pos) { if (batch_processing) { std::vector results(batch_size); for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { results[batch_i] = from_blob( reinterpret_cast(this->renderer_vec[batch_i].grad_pos_d), {static_cast(n_points), 3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } std::get<0>(ret) = torch::stack(results); } else { std::get<0>(ret) = from_blob( reinterpret_cast(this->renderer_vec[0].grad_pos_d), {static_cast(n_points), 3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } } if (dif_col) { if (batch_processing) { std::vector results(batch_size); for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { results[batch_i] = from_blob( reinterpret_cast(this->renderer_vec[batch_i].grad_col_d), {static_cast(n_points), this->renderer_vec[0].cam.n_channels}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } std::get<1>(ret) = torch::stack(results); } else { std::get<1>(ret) = from_blob( reinterpret_cast(this->renderer_vec[0].grad_col_d), {static_cast(n_points), this->renderer_vec[0].cam.n_channels}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } } if (dif_rad) { if (batch_processing) { std::vector results(batch_size); for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { results[batch_i] = from_blob( reinterpret_cast(this->renderer_vec[batch_i].grad_rad_d), {static_cast(n_points)}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } std::get<2>(ret) = torch::stack(results); } else { std::get<2>(ret) = from_blob( reinterpret_cast(this->renderer_vec[0].grad_rad_d), {static_cast(n_points)}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } } if (dif_cam) { if (batch_processing) { std::vector res_p1(batch_size); std::vector res_p2(batch_size); std::vector res_p3(batch_size); std::vector res_p4(batch_size); for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { res_p1[batch_i] = from_blob( reinterpret_cast(this->renderer_vec[batch_i].grad_cam_d), {3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); res_p2[batch_i] = from_blob( reinterpret_cast( this->renderer_vec[batch_i].grad_cam_d + 3), {3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); res_p3[batch_i] = from_blob( reinterpret_cast( this->renderer_vec[batch_i].grad_cam_d + 6), {3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); res_p4[batch_i] = from_blob( reinterpret_cast( this->renderer_vec[batch_i].grad_cam_d + 9), {3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } std::get<3>(ret) = torch::stack(res_p1); std::get<4>(ret) = torch::stack(res_p2); std::get<5>(ret) = torch::stack(res_p3); std::get<6>(ret) = torch::stack(res_p4); } else { std::get<3>(ret) = from_blob( reinterpret_cast(this->renderer_vec[0].grad_cam_d), {3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); std::get<4>(ret) = from_blob( reinterpret_cast(this->renderer_vec[0].grad_cam_d + 3), {3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); std::get<5>(ret) = from_blob( reinterpret_cast(this->renderer_vec[0].grad_cam_d + 6), {3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); std::get<6>(ret) = from_blob( reinterpret_cast(this->renderer_vec[0].grad_cam_d + 9), {3}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } } if (dif_opy) { if (batch_processing) { std::vector results(batch_size); for (size_t batch_i = 0; batch_i < batch_size; ++batch_i) { results[batch_i] = from_blob( reinterpret_cast(this->renderer_vec[batch_i].grad_opy_d), {static_cast(n_points)}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } std::get<7>(ret) = torch::stack(results); } else { std::get<7>(ret) = from_blob( reinterpret_cast(this->renderer_vec[0].grad_opy_d), {static_cast(n_points)}, this->device_type, this->device_index, torch::kFloat, this->device_type == c10::DeviceType::CUDA #ifdef WITH_CUDA ? at::cuda::getCurrentCUDAStream() #else ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); } } return ret; }; } // namespace pytorch } // namespace pulsar