Commit 92aa2fa8 authored by zachteed's avatar zachteed
Browse files

initial commit

parents
#ifndef LIETORCH_GPU_H_
#define LIETORCH_GPU_H_
#include <vector>
#include <torch/extension.h>
#include <cuda.h>
#include <cuda_runtime.h>
// unary operations
torch::Tensor exp_forward_gpu(int, torch::Tensor);
std::vector<torch::Tensor> exp_backward_gpu(int, torch::Tensor, torch::Tensor);
torch::Tensor log_forward_gpu(int, torch::Tensor);
std::vector<torch::Tensor> log_backward_gpu(int, torch::Tensor, torch::Tensor);
torch::Tensor inv_forward_gpu(int, torch::Tensor);
std::vector<torch::Tensor> inv_backward_gpu(int, torch::Tensor, torch::Tensor);
// binary operations
torch::Tensor mul_forward_gpu(int, torch::Tensor, torch::Tensor);
std::vector<torch::Tensor> mul_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
torch::Tensor adj_forward_gpu(int, torch::Tensor, torch::Tensor);
std::vector<torch::Tensor> adj_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
torch::Tensor adjT_forward_gpu(int, torch::Tensor, torch::Tensor);
std::vector<torch::Tensor> adjT_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
torch::Tensor act_forward_gpu(int, torch::Tensor, torch::Tensor);
std::vector<torch::Tensor> act_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
torch::Tensor act4_forward_gpu(int, torch::Tensor, torch::Tensor);
std::vector<torch::Tensor> act4_backward_gpu(int, torch::Tensor, torch::Tensor, torch::Tensor);
// utility operators
torch::Tensor as_matrix_forward_gpu(int, torch::Tensor);
torch::Tensor jleft_forward_gpu(int, torch::Tensor, torch::Tensor);
#endif
\ No newline at end of file
#ifndef RxSO3_HEADER
#define RxSO3_HEADER
#include <stdio.h>
#include <Eigen/Dense>
#include <Eigen/Geometry>
#include "common.h"
template <typename Scalar>
class RxSO3 {
public:
const static int constexpr K = 4; // manifold dimension
const static int constexpr N = 5; // embedding dimension
using Vector3 = Eigen::Matrix<Scalar,3,1>;
using Vector4 = Eigen::Matrix<Scalar,4,1>;
using Matrix3 = Eigen::Matrix<Scalar,3,3>;
using Tangent = Eigen::Matrix<Scalar,K,1>;
using Data = Eigen::Matrix<Scalar,N,1>;
using Point = Eigen::Matrix<Scalar,3,1>;
using Point4 = Eigen::Matrix<Scalar,4,1>;
using Quaternion = Eigen::Quaternion<Scalar>;
using Transformation = Eigen::Matrix<Scalar,3,3>;
using Adjoint = Eigen::Matrix<Scalar,4,4>;
EIGEN_DEVICE_FUNC RxSO3(Quaternion const& q, Scalar const s)
: unit_quaternion(q), scale(s) {
unit_quaternion.normalize();
};
EIGEN_DEVICE_FUNC RxSO3(const Scalar *data) : unit_quaternion(data), scale(data[4]) {
unit_quaternion.normalize();
};
EIGEN_DEVICE_FUNC RxSO3() {
unit_quaternion = Quaternion::Identity();
scale = Scalar(1.0);
}
EIGEN_DEVICE_FUNC RxSO3<Scalar> inv() {
return RxSO3<Scalar>(unit_quaternion.conjugate(), 1.0/scale);
}
EIGEN_DEVICE_FUNC Data data() const {
Data data_vec; data_vec << unit_quaternion.coeffs(), scale;
return data_vec;
}
EIGEN_DEVICE_FUNC RxSO3<Scalar> operator*(RxSO3<Scalar> const& other) {
return RxSO3<Scalar>(unit_quaternion * other.unit_quaternion, scale * other.scale);
}
EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
const Quaternion& q = unit_quaternion;
Point uv = q.vec().cross(p); uv += uv;
return scale * (p + q.w()*uv + q.vec().cross(uv));
}
EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
Point4 p1; p1 << this->operator*(p.template segment<3>(0)), p(3);
return p1;
}
EIGEN_DEVICE_FUNC Adjoint Adj() const {
Adjoint Ad = Adjoint::Identity();
Ad.template block<3,3>(0,0) = unit_quaternion.toRotationMatrix();
return Ad;
}
EIGEN_DEVICE_FUNC Transformation Matrix() const {
return scale * unit_quaternion.toRotationMatrix();
}
EIGEN_DEVICE_FUNC Eigen::Matrix<Scalar,4,4> Matrix4x4() const {
Eigen::Matrix<Scalar,4,4> T;
T = Eigen::Matrix<Scalar,4,4>::Identity();
T.template block<3,3>(0,0) = Matrix();
return T;
}
EIGEN_DEVICE_FUNC Transformation Rotation() const {
return unit_quaternion.toRotationMatrix();
}
EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
return Adj() * a;
}
EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
return Adj().transpose() * a;
}
EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& phi_sigma) {
Vector3 const phi = phi_sigma.template segment<3>(0);
return SO3<Scalar>::hat(phi) + phi(3) * Transformation::Identity();
}
EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& phi_sigma) {
Vector3 const phi = phi_sigma.template segment<3>(0);
Matrix3 const Phi = SO3<Scalar>::hat(phi);
Adjoint ad = Adjoint::Zero();
ad.template block<3,3>(0,0) = Phi;
return ad;
}
EIGEN_DEVICE_FUNC Tangent Log() const {
using std::abs;
using std::atan;
using std::sqrt;
Scalar squared_n = unit_quaternion.vec().squaredNorm();
Scalar w = unit_quaternion.w();
Scalar two_atan_nbyw_by_n;
/// Atan-based log thanks to
///
/// C. Hertzberg et al.:
/// "Integrating Generic Sensor Fusion Algorithms with Sound State
/// Representation through Encapsulation of Manifolds"
/// Information Fusion, 2011
if (squared_n < EPS * EPS) {
two_atan_nbyw_by_n = Scalar(2) / w - Scalar(2.0/3.0) * (squared_n) / (w * w * w);
} else {
Scalar n = sqrt(squared_n);
if (abs(w) < EPS) {
if (w > Scalar(0)) {
two_atan_nbyw_by_n = PI / n;
} else {
two_atan_nbyw_by_n = -PI / n;
}
} else {
two_atan_nbyw_by_n = Scalar(2) * atan(n / w) / n;
}
}
Tangent phi_sigma;
phi_sigma << two_atan_nbyw_by_n * unit_quaternion.vec(), log(scale);
return phi_sigma;
}
EIGEN_DEVICE_FUNC static RxSO3<Scalar> Exp(Tangent const& phi_sigma) {
Vector3 phi = phi_sigma.template segment<3>(0);
Scalar scale = exp(phi_sigma(3));
Scalar theta2 = phi.squaredNorm();
Scalar theta = sqrt(theta2);
Scalar imag_factor;
Scalar real_factor;
if (theta < EPS) {
Scalar theta4 = theta2 * theta2;
imag_factor = Scalar(0.5) - Scalar(1.0/48.0) * theta2 + Scalar(1.0/3840.0) * theta4;
real_factor = Scalar(1) - Scalar(1.0/8.0) * theta2 + Scalar(1.0/384.0) * theta4;
} else {
imag_factor = sin(.5 * theta) / theta;
real_factor = cos(.5 * theta);
}
Quaternion q(real_factor, imag_factor*phi.x(), imag_factor*phi.y(), imag_factor*phi.z());
return RxSO3<Scalar>(q, scale);
}
EIGEN_DEVICE_FUNC static Matrix3 calcW(Tangent const& phi_sigma) {
// left jacobian
using std::abs;
Matrix3 const I = Matrix3::Identity();
Scalar const one(1);
Scalar const half(0.5);
Vector3 const phi = phi_sigma.template segment<3>(0);
Scalar const sigma = phi_sigma(3);
Scalar const theta = phi.norm();
Matrix3 const Phi = SO3<Scalar>::hat(phi);
Matrix3 const Phi2 = Phi * Phi;
Scalar const scale = exp(sigma);
Scalar A, B, C;
if (abs(sigma) < EPS) {
C = one;
if (abs(theta) < EPS) {
A = half;
B = Scalar(1. / 6.);
} else {
Scalar theta_sq = theta * theta;
A = (one - cos(theta)) / theta_sq;
B = (theta - sin(theta)) / (theta_sq * theta);
}
} else {
C = (scale - one) / sigma;
if (abs(theta) < EPS) {
Scalar sigma_sq = sigma * sigma;
A = ((sigma - one) * scale + one) / sigma_sq;
B = (scale * half * sigma_sq + scale - one - sigma * scale) /
(sigma_sq * sigma);
} else {
Scalar theta_sq = theta * theta;
Scalar a = scale * sin(theta);
Scalar b = scale * cos(theta);
Scalar c = theta_sq + sigma * sigma;
A = (a * sigma + (one - b) * theta) / (theta * c);
B = (C - ((b - one) * sigma + a * theta) / (c)) * one / (theta_sq);
}
}
return A * Phi + B * Phi2 + C * I;
}
EIGEN_DEVICE_FUNC static Matrix3 calcWInv(Tangent const& phi_sigma) {
// left jacobian inverse
Matrix3 const I = Matrix3::Identity();
Scalar const half(0.5);
Scalar const one(1);
Scalar const two(2);
Vector3 const phi = phi_sigma.template segment<3>(0);
Scalar const sigma = phi_sigma(3);
Scalar const theta = phi.norm();
Scalar const scale = exp(sigma);
Matrix3 const Phi = SO3<Scalar>::hat(phi);
Matrix3 const Phi2 = Phi * Phi;
Scalar const scale_sq = scale * scale;
Scalar const theta_sq = theta * theta;
Scalar const sin_theta = sin(theta);
Scalar const cos_theta = cos(theta);
Scalar a, b, c;
if (abs(sigma * sigma) < EPS) {
c = one - half * sigma;
a = -half;
if (abs(theta_sq) < EPS) {
b = Scalar(1. / 12.);
} else {
b = (theta * sin_theta + two * cos_theta - two) /
(two * theta_sq * (cos_theta - one));
}
} else {
Scalar const scale_cu = scale_sq * scale;
c = sigma / (scale - one);
if (abs(theta_sq) < EPS) {
a = (-sigma * scale + scale - one) / ((scale - one) * (scale - one));
b = (scale_sq * sigma - two * scale_sq + scale * sigma + two * scale) /
(two * scale_cu - Scalar(6) * scale_sq + Scalar(6) * scale - two);
} else {
Scalar const s_sin_theta = scale * sin_theta;
Scalar const s_cos_theta = scale * cos_theta;
a = (theta * s_cos_theta - theta - sigma * s_sin_theta) /
(theta * (scale_sq - two * s_cos_theta + one));
b = -scale *
(theta * s_sin_theta - theta * sin_theta + sigma * s_cos_theta -
scale * sigma + sigma * cos_theta - sigma) /
(theta_sq * (scale_cu - two * scale * s_cos_theta - scale_sq +
two * s_cos_theta + scale - one));
}
}
return a * Phi + b * Phi2 + c * I;
}
EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& phi_sigma) {
// left jacobian
Adjoint J = Adjoint::Identity();
Vector3 phi = phi_sigma.template segment<3>(0);
J.template block<3,3>(0,0) = SO3<Scalar>::left_jacobian(phi);
return J;
}
EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& phi_sigma) {
// left jacobian inverse
Adjoint Jinv = Adjoint::Identity();
Vector3 phi = phi_sigma.template segment<3>(0);
Jinv.template block<3,3>(0,0) = SO3<Scalar>::left_jacobian_inverse(phi);
return Jinv;
}
EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,3,4> act_jacobian(Point const& p) {
// jacobian action on a point
Eigen::Matrix<Scalar,3,4> Ja;
Ja << SO3<Scalar>::hat(-p), p;
return Ja;
}
EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,4,4> act4_jacobian(Point4 const& p) {
// jacobian action on a point
Eigen::Matrix<Scalar,4,4> J = Eigen::Matrix<Scalar,4,4>::Zero();
J.template block<3,3>(0,0) = SO3<Scalar>::hat(-p.template segment<3>(0));
J.template block<3,1>(0,3) = p.template segment<3>(0);
return J;
}
private:
Quaternion unit_quaternion;
Scalar scale;
};
#endif
#ifndef SE3_HEADER
#define SE3_HEADER
#include <stdio.h>
#include <Eigen/Dense>
#include <Eigen/Geometry>
#include "common.h"
#include "so3.h"
template <typename Scalar>
class SE3 {
public:
const static int constexpr K = 6; // manifold dimension
const static int constexpr N = 7; // embedding dimension
using Vector3 = Eigen::Matrix<Scalar,3,1>;
using Vector4 = Eigen::Matrix<Scalar,4,1>;
using Matrix3 = Eigen::Matrix<Scalar,3,3>;
using Tangent = Eigen::Matrix<Scalar,K,1>;
using Point = Eigen::Matrix<Scalar,3,1>;
using Point4 = Eigen::Matrix<Scalar,4,1>;
using Data = Eigen::Matrix<Scalar,N,1>;
using Transformation = Eigen::Matrix<Scalar,4,4>;
using Adjoint = Eigen::Matrix<Scalar,K,K>;
EIGEN_DEVICE_FUNC SE3() { translation = Vector3::Zero(); }
EIGEN_DEVICE_FUNC SE3(SO3<Scalar> const& so3, Vector3 const& t) : so3(so3), translation(t) {};
EIGEN_DEVICE_FUNC SE3(const Scalar *data) : translation(data), so3(data+3) {};
EIGEN_DEVICE_FUNC SE3<Scalar> inv() {
return SE3(so3.inv(), -(so3.inv()*translation));
}
EIGEN_DEVICE_FUNC Data data() const {
Data data_vec; data_vec << translation, so3.data();
return data_vec;
}
EIGEN_DEVICE_FUNC SE3<Scalar> operator*(SE3<Scalar> const& other) {
return SE3(so3 * other.so3, translation + so3 * other.translation);
}
EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
return so3 * p + translation;
}
EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
Point4 p1; p1 << so3 * p.template segment<3>(0) + translation * p(3), p(3);
return p1;
}
EIGEN_DEVICE_FUNC Adjoint Adj() const {
Matrix3 R = so3.Matrix();
Matrix3 tx = SO3<Scalar>::hat(translation);
Matrix3 Zer = Matrix3::Zero();
Adjoint Ad;
Ad << R, tx*R, Zer, R;
return Ad;
}
EIGEN_DEVICE_FUNC Transformation Matrix() const {
Transformation T = Transformation::Identity();
T.template block<3,3>(0,0) = so3.Matrix();
T.template block<3,1>(0,3) = translation;
return T;
}
EIGEN_DEVICE_FUNC Transformation Matrix4x4() const {
return Matrix();
}
EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
return Adj() * a;
}
EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
return Adj().transpose() * a;
}
EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& tau_phi) {
Vector3 tau = tau_phi.template segment<3>(0);
Vector3 phi = tau_phi.template segment<3>(3);
Transformation TauPhi = Transformation::Zero();
TauPhi.template block<3,3>(0,0) = SO3<Scalar>::hat(phi);
TauPhi.template block<3,1>(0,3) = tau;
return TauPhi;
}
EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& tau_phi) {
Vector3 tau = tau_phi.template segment<3>(0);
Vector3 phi = tau_phi.template segment<3>(3);
Matrix3 Tau = SO3<Scalar>::hat(tau);
Matrix3 Phi = SO3<Scalar>::hat(phi);
Matrix3 Zer = Matrix3::Zero();
Adjoint ad;
ad << Phi, Tau, Zer, Phi;
return ad;
}
EIGEN_DEVICE_FUNC Tangent Log() const {
Vector3 phi = so3.Log();
Matrix3 Vinv = SO3<Scalar>::left_jacobian_inverse(phi);
Tangent tau_phi;
tau_phi << Vinv * translation, phi;
return tau_phi;
}
EIGEN_DEVICE_FUNC static SE3<Scalar> Exp(Tangent const& tau_phi) {
Vector3 tau = tau_phi.template segment<3>(0);
Vector3 phi = tau_phi.template segment<3>(3);
SO3<Scalar> so3 = SO3<Scalar>::Exp(phi);
Vector3 t = SO3<Scalar>::left_jacobian(phi) * tau;
return SE3<Scalar>(so3, t);
}
EIGEN_DEVICE_FUNC static Matrix3 calcQ(Tangent const& tau_phi) {
// Q matrix
Vector3 tau = tau_phi.template segment<3>(0);
Vector3 phi = tau_phi.template segment<3>(3);
Matrix3 Tau = SO3<Scalar>::hat(tau);
Matrix3 Phi = SO3<Scalar>::hat(phi);
Scalar theta = phi.norm();
Scalar theta_pow2 = theta * theta;
Scalar theta_pow4 = theta_pow2 * theta_pow2;
Scalar coef1 = (theta < EPS) ?
Scalar(1.0/6.0) - Scalar(1.0/120.0) * theta_pow2 :
(theta - sin(theta)) / (theta_pow2 * theta);
Scalar coef2 = (theta < EPS) ?
Scalar(1.0/24.0) - Scalar(1.0/720.0) * theta_pow2 :
(theta_pow2 + 2*cos(theta) - 2) / (2 * theta_pow4);
Scalar coef3 = (theta < EPS) ?
Scalar(1.0/120.0) - Scalar(1.0/2520.0) * theta_pow2 :
(2*theta - 3*sin(theta) + theta*cos(theta)) / (2 * theta_pow4 * theta);
Matrix3 Q = Scalar(0.5) * Tau +
coef1 * (Phi*Tau + Tau*Phi + Phi*Tau*Phi) +
coef2 * (Phi*Phi*Tau + Tau*Phi*Phi - 3*Phi*Tau*Phi) +
coef3 * (Phi*Tau*Phi*Phi + Phi*Phi*Tau*Phi);
return Q;
}
EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& tau_phi) {
// left jacobian
Vector3 tau = tau_phi.template segment<3>(0);
Vector3 phi = tau_phi.template segment<3>(3);
Matrix3 J = SO3<Scalar>::left_jacobian(phi);
Matrix3 Q = SE3<Scalar>::calcQ(tau_phi);
Matrix3 Zer = Matrix3::Zero();
Adjoint J6x6;
J6x6 << J, Q, Zer, J;
return J6x6;
}
EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& tau_phi) {
// left jacobian inverse
Vector3 tau = tau_phi.template segment<3>(0);
Vector3 phi = tau_phi.template segment<3>(3);
Matrix3 Jinv = SO3<Scalar>::left_jacobian_inverse(phi);
Matrix3 Q = SE3<Scalar>::calcQ(tau_phi);
Matrix3 Zer = Matrix3::Zero();
Adjoint J6x6;
J6x6 << Jinv, -Jinv * Q * Jinv, Zer, Jinv;
return J6x6;
}
EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,3,6> act_jacobian(Point const& p) {
// jacobian action on a point
Eigen::Matrix<Scalar,3,6> J;
J.template block<3,3>(0,0) = Matrix3::Identity();
J.template block<3,3>(0,3) = SO3<Scalar>::hat(-p);
return J;
}
EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,4,6> act4_jacobian(Point4 const& p) {
// jacobian action on a point
Eigen::Matrix<Scalar,4,6> J = Eigen::Matrix<Scalar,4,6>::Zero();
J.template block<3,3>(0,0) = p(3) * Matrix3::Identity();
J.template block<3,3>(0,3) = SO3<Scalar>::hat(-p.template segment<3>(0));
return J;
}
private:
SO3<Scalar> so3;
Vector3 translation;
};
#endif
#ifndef Sim3_HEADER
#define Sim3_HEADER
#include <stdio.h>
#include <iostream>
#include <Eigen/Dense>
#include <Eigen/Geometry>
#include "common.h"
#include "so3.h"
#include "rxso3.h"
template <typename Scalar>
class Sim3 {
public:
const static int constexpr K = 7; // manifold dimension
const static int constexpr N = 8; // embedding dimension
using Vector3 = Eigen::Matrix<Scalar,3,1>;
using Vector4 = Eigen::Matrix<Scalar,4,1>;
using Matrix3 = Eigen::Matrix<Scalar,3,3>;
using Tangent = Eigen::Matrix<Scalar,K,1>;
using Point = Eigen::Matrix<Scalar,3,1>;
using Point4 = Eigen::Matrix<Scalar,4,1>;
using Data = Eigen::Matrix<Scalar,N,1>;
using Transformation = Eigen::Matrix<Scalar,4,4>;
using Adjoint = Eigen::Matrix<Scalar,K,K>;
EIGEN_DEVICE_FUNC Sim3() {
translation = Vector3::Zero();
}
EIGEN_DEVICE_FUNC Sim3(RxSO3<Scalar> const& rxso3, Vector3 const& t)
: rxso3(rxso3), translation(t) {};
EIGEN_DEVICE_FUNC Sim3(const Scalar *data)
: translation(data), rxso3(data+3) {};
EIGEN_DEVICE_FUNC Sim3<Scalar> inv() {
return Sim3<Scalar>(rxso3.inv(), -(rxso3.inv() * translation));
}
EIGEN_DEVICE_FUNC Data data() const {
Data data_vec; data_vec << translation, rxso3.data();
return data_vec;
}
EIGEN_DEVICE_FUNC Sim3<Scalar> operator*(Sim3<Scalar> const& other) {
return Sim3(rxso3 * other.rxso3, translation + rxso3 * other.translation);
}
EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
return (rxso3 * p) + translation;
}
EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
Point4 p1; p1 << rxso3 * p.template segment<3>(0) + p(3) * translation , p(3);
return p1;
}
EIGEN_DEVICE_FUNC Transformation Matrix() const {
Transformation T = Transformation::Identity();
T.template block<3,3>(0,0) = rxso3.Matrix();
T.template block<3,1>(0,3) = translation;
return T;
}
EIGEN_DEVICE_FUNC Transformation Matrix4x4() const {
Transformation T = Transformation::Identity();
T.template block<3,3>(0,0) = rxso3.Matrix();
T.template block<3,1>(0,3) = translation;
return T;
}
EIGEN_DEVICE_FUNC Adjoint Adj() const {
Adjoint Ad = Adjoint::Identity();
Matrix3 sR = rxso3.Matrix();
Matrix3 tx = SO3<Scalar>::hat(translation);
Matrix3 R = rxso3.Rotation();
Ad.template block<3,3>(0,0) = sR;
Ad.template block<3,3>(0,3) = tx * R;
Ad.template block<3,1>(0,6) = -translation;
Ad.template block<3,3>(3,3) = R;
return Ad;
}
EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
return Adj() * a;
}
EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
return Adj().transpose() * a;
}
EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& tau_phi_sigma) {
Vector3 tau = tau_phi_sigma.template segment<3>(0);
Vector3 phi = tau_phi_sigma.template segment<3>(3);
Scalar sigma = tau_phi_sigma(6);
Matrix3 Phi = SO3<Scalar>::hat(phi);
Matrix3 I = Matrix3::Identity();
Transformation Omega = Transformation::Zero();
Omega.template block<3,3>(0,0) = Phi + sigma * I;
Omega.template block<3,1>(0,3) = tau;
return Omega;
}
EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& tau_phi_sigma) {
Adjoint ad = Adjoint::Zero();
Vector3 tau = tau_phi_sigma.template segment<3>(0);
Vector3 phi = tau_phi_sigma.template segment<3>(3);
Scalar sigma = tau_phi_sigma(6);
Matrix3 Tau = SO3<Scalar>::hat(tau);
Matrix3 Phi = SO3<Scalar>::hat(phi);
Matrix3 I = Matrix3::Identity();
ad.template block<3,3>(0,0) = Phi + sigma * I;
ad.template block<3,3>(0,3) = Tau;
ad.template block<3,1>(0,6) = -tau;
ad.template block<3,3>(3,3) = Phi;
return ad;
}
EIGEN_DEVICE_FUNC Tangent Log() const {
// logarithm map
Vector4 phi_sigma = rxso3.Log();
Matrix3 W = RxSO3<Scalar>::calcW(phi_sigma);
Tangent tau_phi_sigma;
tau_phi_sigma << W.inverse() * translation, phi_sigma;
return tau_phi_sigma;
}
EIGEN_DEVICE_FUNC static Sim3<Scalar> Exp(Tangent const& tau_phi_sigma) {
// exponential map
Vector3 tau = tau_phi_sigma.template segment<3>(0);
Vector4 phi_sigma = tau_phi_sigma.template segment<4>(3);
RxSO3<Scalar> rxso3 = RxSO3<Scalar>::Exp(phi_sigma);
Matrix3 W = RxSO3<Scalar>::calcW(phi_sigma);
return Sim3<Scalar>(rxso3, W*tau);
}
EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& tau_phi_sigma) {
// left jacobian
Adjoint const Xi = adj(tau_phi_sigma);
Adjoint const Xi2 = Xi * Xi;
Adjoint const Xi4 = Xi2 * Xi2;
return Adjoint::Identity()
+ Scalar(1.0/2.0)*Xi
+ Scalar(1.0/6.0)*Xi2
+ Scalar(1.0/24.0)*Xi*Xi2
+ Scalar(1.0/120.0)*Xi4;
+ Scalar(1.0/720.0)*Xi*Xi4;
}
EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& tau_phi_sigma) {
// left jacobian inverse
Adjoint const Xi = adj(tau_phi_sigma);
Adjoint const Xi2 = Xi * Xi;
Adjoint const Xi4 = Xi2 * Xi2;
return Adjoint::Identity()
- Scalar(1.0/2.0)*Xi
+ Scalar(1.0/12.0)*Xi2
- Scalar(1.0/720.0)*Xi4;
}
EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,3,7> act_jacobian(Point const& p) {
// jacobian action on a point
Eigen::Matrix<Scalar,3,7> J;
J.template block<3,3>(0,0) = Matrix3::Identity();
J.template block<3,3>(0,3) = SO3<Scalar>::hat(-p);
J.template block<3,1>(0,6) = p;
return J;
}
EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,4,7> act4_jacobian(Point4 const& p) {
// jacobian action on a point
Eigen::Matrix<Scalar,4,7> J = Eigen::Matrix<Scalar,4,7>::Zero();
J.template block<3,3>(0,0) = p(3) * Matrix3::Identity();
J.template block<3,3>(0,3) = SO3<Scalar>::hat(-p.template segment<3>(0));
J.template block<3,1>(0,6) = p.template segment<3>(0);
return J;
}
private:
Vector3 translation;
RxSO3<Scalar> rxso3;
};
#endif
#ifndef SO3_HEADER
#define SO3_HEADER
#include <cuda.h>
#include <stdio.h>
#include <Eigen/Dense>
#include <Eigen/Geometry>
#include "common.h"
template <typename Scalar>
class SO3 {
public:
const static int constexpr K = 3; // manifold dimension
const static int constexpr N = 4; // embedding dimension
using Vector3 = Eigen::Matrix<Scalar,3,1>;
using Vector4 = Eigen::Matrix<Scalar,4,1>;
using Matrix3 = Eigen::Matrix<Scalar,3,3>;
using Tangent = Eigen::Matrix<Scalar,K,1>;
using Data = Eigen::Matrix<Scalar,N,1>;
using Point = Eigen::Matrix<Scalar,3,1>;
using Point4 = Eigen::Matrix<Scalar,4,1>;
using Transformation = Eigen::Matrix<Scalar,3,3>;
using Adjoint = Eigen::Matrix<Scalar,K,K>;
using Quaternion = Eigen::Quaternion<Scalar>;
EIGEN_DEVICE_FUNC SO3(Quaternion const& q) : unit_quaternion(q) {
unit_quaternion.normalize();
};
EIGEN_DEVICE_FUNC SO3(const Scalar *data) : unit_quaternion(data) {
unit_quaternion.normalize();
};
EIGEN_DEVICE_FUNC SO3() {
unit_quaternion = Quaternion::Identity();
}
EIGEN_DEVICE_FUNC SO3<Scalar> inv() {
return SO3<Scalar>(unit_quaternion.conjugate());
}
EIGEN_DEVICE_FUNC Data data() const {
return unit_quaternion.coeffs();
}
EIGEN_DEVICE_FUNC SO3<Scalar> operator*(SO3<Scalar> const& other) {
return SO3(unit_quaternion * other.unit_quaternion);
}
EIGEN_DEVICE_FUNC Point operator*(Point const& p) const {
const Quaternion& q = unit_quaternion;
Point uv = q.vec().cross(p);
uv += uv;
return p + q.w()*uv + q.vec().cross(uv);
}
EIGEN_DEVICE_FUNC Point4 act4(Point4 const& p) const {
Point4 p1; p1 << this->operator*(p.template segment<3>(0)), p(3);
return p1;
}
EIGEN_DEVICE_FUNC Adjoint Adj() const {
return unit_quaternion.toRotationMatrix();
}
EIGEN_DEVICE_FUNC Transformation Matrix() const {
return unit_quaternion.toRotationMatrix();
}
EIGEN_DEVICE_FUNC Eigen::Matrix<Scalar,4,4> Matrix4x4() const {
Eigen::Matrix<Scalar,4,4> T = Eigen::Matrix<Scalar,4,4>::Identity();
T.template block<3,3>(0,0) = Matrix();
return T;
}
EIGEN_DEVICE_FUNC Tangent Adj(Tangent const& a) const {
return Adj() * a;
}
EIGEN_DEVICE_FUNC Tangent AdjT(Tangent const& a) const {
return Adj().transpose() * a;
}
EIGEN_DEVICE_FUNC static Transformation hat(Tangent const& phi) {
Transformation Phi;
Phi <<
0.0, -phi(2), phi(1),
phi(2), 0.0, -phi(0),
-phi(1), phi(0), 0.0;
return Phi;
}
EIGEN_DEVICE_FUNC static Adjoint adj(Tangent const& phi) {
return SO3<Scalar>::hat(phi);
}
EIGEN_DEVICE_FUNC Tangent Log() const {
using std::abs;
using std::atan;
using std::sqrt;
Scalar squared_n = unit_quaternion.vec().squaredNorm();
Scalar w = unit_quaternion.w();
Scalar two_atan_nbyw_by_n;
/// Atan-based log thanks to
///
/// C. Hertzberg et al.:
/// "Integrating Generic Sensor Fusion Algorithms with Sound State
/// Representation through Encapsulation of Manifolds"
/// Information Fusion, 2011
if (squared_n < EPS * EPS) {
// If quaternion is normalized and n=0, then w should be 1;
// w=0 should never happen here!
Scalar squared_w = w * w;
two_atan_nbyw_by_n =
Scalar(2) / w - Scalar(2.0/3.0) * (squared_n) / (w * squared_w);
} else {
Scalar n = sqrt(squared_n);
if (abs(w) < EPS) {
if (w > Scalar(0)) {
two_atan_nbyw_by_n = Scalar(PI) / n;
} else {
two_atan_nbyw_by_n = -Scalar(PI) / n;
}
} else {
two_atan_nbyw_by_n = Scalar(2) * atan(n / w) / n;
}
}
return two_atan_nbyw_by_n * unit_quaternion.vec();
}
EIGEN_DEVICE_FUNC static SO3<Scalar> Exp(Tangent const& phi) {
Scalar theta2 = phi.squaredNorm();
Scalar theta = sqrt(theta2);
Scalar imag_factor;
Scalar real_factor;
if (theta < EPS) {
Scalar theta4 = theta2 * theta2;
imag_factor = Scalar(0.5) - Scalar(1.0/48.0) * theta2 + Scalar(1.0/3840.0) * theta4;
real_factor = Scalar(1) - Scalar(1.0/8.0) * theta2 + Scalar(1.0/384.0) * theta4;
} else {
imag_factor = sin(.5 * theta) / theta;
real_factor = cos(.5 * theta);
}
Quaternion q(real_factor, imag_factor*phi.x(), imag_factor*phi.y(), imag_factor*phi.z());
return SO3<Scalar>(q);
}
EIGEN_DEVICE_FUNC static Adjoint left_jacobian(Tangent const& phi) {
// left jacobian
Matrix3 I = Matrix3::Identity();
Matrix3 Phi = SO3<Scalar>::hat(phi);
Matrix3 Phi2 = Phi * Phi;
Scalar theta2 = phi.squaredNorm();
Scalar theta = sqrt(theta2);
Scalar coef1 = (theta < EPS) ?
Scalar(1.0/2.0) - Scalar(1.0/24.0) * theta2 :
(1.0 - cos(theta)) / theta2;
Scalar coef2 = (theta < EPS) ?
Scalar(1.0/6.0) - Scalar(1.0/120.0) * theta2 :
(theta - sin(theta)) / (theta2 * theta);
return I + coef1 * Phi + coef2 * Phi * Phi;
}
EIGEN_DEVICE_FUNC static Adjoint left_jacobian_inverse(Tangent const& phi) {
// left jacobian inverse
Matrix3 I = Matrix3::Identity();
Matrix3 Phi = SO3<Scalar>::hat(phi);
Matrix3 Phi2 = Phi * Phi;
Scalar theta2 = phi.squaredNorm();
Scalar theta = sqrt(theta2);
Scalar half_theta = Scalar(.5) * theta ;
Scalar coef2 = (theta < EPS) ? Scalar(1.0/12.0) :
(Scalar(1) -
theta * cos(half_theta) / (Scalar(2) * sin(half_theta))) /
(theta * theta);
return I + Scalar(-0.5) * Phi + coef2 * Phi2;
}
EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,3,3> act_jacobian(Point const& p) {
// jacobian action on a point
return SO3<Scalar>::hat(-p);
}
EIGEN_DEVICE_FUNC static Eigen::Matrix<Scalar,4,3> act4_jacobian(Point4 const& p) {
// jacobian action on a point
Eigen::Matrix<Scalar,4,3> J = Eigen::Matrix<Scalar,4,3>::Zero();
J.template block<3,3>(0,0) = SO3<Scalar>::hat(-p.template segment<3>(0));
return J;
}
private:
Quaternion unit_quaternion;
};
#endif
import torch
import lietorch
from lietorch import SO3, RxSO3, SE3, Sim3
from gradcheck import gradcheck, get_analytical_jacobian
### forward tests ###
def make_homogeneous(p):
return torch.cat([p, torch.ones_like(p[...,:1])], dim=-1)
def matv(A, b):
return torch.matmul(A, b[...,None])[..., 0]
def test_exp_log(Group, device='cuda'):
""" check Log(Exp(x)) == x """
a = .2*torch.randn(2,3,4,5,6,7,Group.manifold_dim, device=device).double()
b = Group.exp(a).log()
assert torch.allclose(a,b,atol=1e-8), "should be identity"
print("\t-", Group, "Passed exp-log test")
def test_inv(Group, device='cuda'):
""" check X * X^{-1} == 0 """
X = Group.exp(.1*torch.randn(2,3,4,5,Group.manifold_dim, device=device).double())
a = (X * X.inv()).log()
assert torch.allclose(a, torch.zeros_like(a), atol=1e-8), "should be 0"
print("\t-", Group, "Passed inv test")
def test_adj(Group, device='cuda'):
""" check X * Exp(a) == Exp(Adj(X,a)) * X 0 """
X = Group.exp(torch.randn(2,3,4,5, Group.manifold_dim, device=device).double())
a = torch.randn(2,3,4,5, Group.manifold_dim, device=device).double()
b = X.adj(a)
Y1 = X * Group.exp(a)
Y2 = Group.exp(b) * X
c = (Y1 * Y2.inv()).log()
assert torch.allclose(c, torch.zeros_like(c), atol=1e-8), "should be 0"
print("\t-", Group, "Passed adj test")
def test_act(Group, device='cuda'):
X = Group.exp(torch.randn(1, Group.manifold_dim, device=device).double())
p = torch.randn(1,3,device=device).double()
p1 = X.act(p)
p2 = matv(X.matrix(), make_homogeneous(p))
assert torch.allclose(p1, p2[...,:3], atol=1e-8), "should be 0"
print("\t-", Group, "Passed act test")
### backward tests ###
def test_exp_log_grad(Group, device='cuda', tol=1e-8):
D = Group.manifold_dim
def fn(a):
return Group.exp(a).log()
a = torch.zeros(1, Group.manifold_dim, requires_grad=True, device=device).double()
analytical, reentrant, correct_grad_sizes, correct_grad_types = \
get_analytical_jacobian((a,), fn(a))
assert torch.allclose(analytical[0], torch.eye(D, device=device).double(), atol=tol)
a = .2 * torch.randn(1, Group.manifold_dim, requires_grad=True, device=device).double()
analytical, reentrant, correct_grad_sizes, correct_grad_types = \
get_analytical_jacobian((a,), fn(a))
assert torch.allclose(analytical[0], torch.eye(D, device=device).double(), atol=tol)
print("\t-", Group, "Passed eye-grad test")
def test_inv_log_grad(Group, device='cuda', tol=1e-8):
D = Group.manifold_dim
X = Group.exp(.2*torch.randn(1,D,device=device).double())
def fn(a):
return (Group.exp(a) * X).inv().log()
a = torch.zeros(1, D, requires_grad=True, device=device).double()
analytical, numerical = gradcheck(fn, [a], eps=1e-4)
# assert torch.allclose(analytical[0], numerical[0], atol=tol)
if not torch.allclose(analytical[0], numerical[0], atol=tol):
print(analytical[0])
print(numerical[0])
print("\t-", Group, "Passed inv-grad test")
def test_adj_grad(Group, device='cuda'):
D = Group.manifold_dim
X = Group.exp(.5*torch.randn(1,Group.manifold_dim, device=device).double())
def fn(a, b):
return (Group.exp(a) * X).adj(b)
a = torch.zeros(1, D, requires_grad=True, device=device).double()
b = torch.randn(1, D, requires_grad=True, device=device).double()
analytical, numerical = gradcheck(fn, [a, b], eps=1e-4)
assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
assert torch.allclose(analytical[1], numerical[1], atol=1e-8)
print("\t-", Group, "Passed adj-grad test")
def test_adjT_grad(Group, device='cuda'):
D = Group.manifold_dim
X = Group.exp(.5*torch.randn(1,Group.manifold_dim, device=device).double())
def fn(a, b):
return (Group.exp(a) * X).adjT(b)
a = torch.zeros(1, D, requires_grad=True, device=device).double()
b = torch.randn(1, D, requires_grad=True, device=device).double()
analytical, numerical = gradcheck(fn, [a, b], eps=1e-4)
assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
assert torch.allclose(analytical[1], numerical[1], atol=1e-8)
print("\t-", Group, "Passed adjT-grad test")
def test_act_grad(Group, device='cuda'):
D = Group.manifold_dim
X = Group.exp(5*torch.randn(1,D, device=device).double())
def fn(a, b):
return (X*Group.exp(a)).act(b)
a = torch.zeros(1, D, requires_grad=True, device=device).double()
b = torch.randn(1, 3, requires_grad=True, device=device).double()
analytical, numerical = gradcheck(fn, [a, b], eps=1e-4)
assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
assert torch.allclose(analytical[1], numerical[1], atol=1e-8)
print("\t-", Group, "Passed act-grad test")
def scale(device='cuda'):
def fn(a, s):
X = SE3.exp(a)
X.scale(s)
return X.log()
s = torch.rand(1, requires_grad=True, device=device).double()
a = torch.randn(1, 6, requires_grad=True, device=device).double()
analytical, numerical = gradcheck(fn, [a, s], eps=1e-3)
print(analytical[1])
print(numerical[1])
assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
assert torch.allclose(analytical[1], numerical[1], atol=1e-8)
print("\t-", "Passed se3-to-sim3 test")
def extract_translation(Group, device='cuda'):
""" prototype function """
D = Group.manifold_dim
X = Group.exp(5*torch.randn(1,D, device=device).double())
def fn(a):
return (Group.exp(a)*X).translation()
a = torch.zeros(1, D, requires_grad=True, device=device).double()
analytical, numerical = gradcheck(fn, [a], eps=1e-4)
print(analytical[0])
print(numerical[0])
assert torch.allclose(analytical[0], numerical[0], atol=1e-8)
print("\t-", Group, "Passed translation test")
if __name__ == '__main__':
print("Testing lietorch forward pass (CPU) ...")
for Group in [SO3, RxSO3, SE3, Sim3]:
test_exp_log(Group, device='cpu')
test_inv(Group, device='cpu')
test_adj(Group, device='cpu')
test_act(Group, device='cpu')
print("Testing lietorch backward pass (CPU)...")
for Group in [SO3, RxSO3, SE3, Sim3]:
if Group == Sim3:
tol = 1e-3
else:
tol = 1e-8
test_exp_log_grad(Group, device='cpu', tol=tol)
test_inv_log_grad(Group, device='cpu', tol=tol)
test_adj_grad(Group, device='cpu')
test_adjT_grad(Group, device='cpu')
test_act_grad(Group, device='cpu')
print("Testing lietorch forward pass (GPU) ...")
for Group in [SO3, RxSO3, SE3, Sim3]:
test_exp_log(Group, device='cuda')
test_inv(Group, device='cuda')
test_adj(Group, device='cuda')
test_act(Group, device='cuda')
print("Testing lietorch backward pass (GPU)...")
for Group in [SO3, RxSO3, SE3, Sim3]:
if Group == Sim3:
tol = 1e-3
else:
tol = 1e-8
test_exp_log_grad(Group, device='cuda', tol=tol)
test_inv_log_grad(Group, device='cuda', tol=tol)
test_adj_grad(Group, device='cuda')
test_adjT_grad(Group, device='cuda')
test_act_grad(Group, device='cuda')
#include <torch/extension.h>
#include <vector>
#include "lietorch_gpu.h"
#include "lietorch_cpu.h"
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
/* Interface for cuda and c++ group operations
enum group_t { SO3=1, SE3=2, Sim3=3 };
X, Y, Z: (uppercase) Lie Group Elements
a, b, c: (lowercase) Lie Algebra Elements
*/
// Unary operations
torch::Tensor expm(int group_index, torch::Tensor a) {
CHECK_CONTIGUOUS(a);
if (a.device().type() == torch::DeviceType::CPU) {
return exp_forward_cpu(group_index, a);
} else if (a.device().type() == torch::DeviceType::CUDA) {
return exp_forward_gpu(group_index, a);
}
}
std::vector<torch::Tensor> expm_backward(int group_index, torch::Tensor grad, torch::Tensor a) {
CHECK_CONTIGUOUS(a);
CHECK_CONTIGUOUS(grad);
if (a.device().type() == torch::DeviceType::CPU) {
return exp_backward_cpu(group_index, grad, a);
} else if (a.device().type() == torch::DeviceType::CUDA) {
return exp_backward_gpu(group_index, grad, a);
}
}
torch::Tensor logm(int group_index, torch::Tensor X) {
CHECK_CONTIGUOUS(X);
if (X.device().type() == torch::DeviceType::CPU) {
return log_forward_cpu(group_index, X);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return log_forward_gpu(group_index, X);
}
}
std::vector<torch::Tensor> logm_backward(int group_index, torch::Tensor grad, torch::Tensor X) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(grad);
if (X.device().type() == torch::DeviceType::CPU) {
return log_backward_cpu(group_index, grad, X);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return log_backward_gpu(group_index, grad, X);
}
}
torch::Tensor inv(int group_index, torch::Tensor X) {
CHECK_CONTIGUOUS(X);
if (X.device().type() == torch::DeviceType::CPU) {
return inv_forward_cpu(group_index, X);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return inv_forward_gpu(group_index, X);
}
}
std::vector<torch::Tensor> inv_backward(int group_index, torch::Tensor grad, torch::Tensor X) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(grad);
if (X.device().type() == torch::DeviceType::CPU) {
return inv_backward_cpu(group_index, grad, X);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return inv_backward_gpu(group_index, grad, X);
}
}
// Binary operations
torch::Tensor mul(int group_index, torch::Tensor X, torch::Tensor Y) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(Y);
if (X.device().type() == torch::DeviceType::CPU) {
return mul_forward_cpu(group_index, X, Y);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return mul_forward_gpu(group_index, X, Y);
}
}
std::vector<torch::Tensor> mul_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(Y);
CHECK_CONTIGUOUS(grad);
if (X.device().type() == torch::DeviceType::CPU) {
return mul_backward_cpu(group_index, grad, X, Y);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return mul_backward_gpu(group_index, grad, X, Y);
}
}
torch::Tensor adj(int group_index, torch::Tensor X, torch::Tensor a) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(a);
if (X.device().type() == torch::DeviceType::CPU) {
return adj_forward_cpu(group_index, X, a);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return adj_forward_gpu(group_index, X, a);
}
}
std::vector<torch::Tensor> adj_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(a);
CHECK_CONTIGUOUS(grad);
if (X.device().type() == torch::DeviceType::CPU) {
return adj_backward_cpu(group_index, grad, X, a);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return adj_backward_gpu(group_index, grad, X, a);
}
}
torch::Tensor adjT(int group_index, torch::Tensor X, torch::Tensor a) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(a);
if (X.device().type() == torch::DeviceType::CPU) {
return adjT_forward_cpu(group_index, X, a);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return adjT_forward_gpu(group_index, X, a);
}
}
std::vector<torch::Tensor> adjT_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(a);
CHECK_CONTIGUOUS(grad);
if (X.device().type() == torch::DeviceType::CPU) {
return adjT_backward_cpu(group_index, grad, X, a);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return adjT_backward_gpu(group_index, grad, X, a);
}
}
torch::Tensor act(int group_index, torch::Tensor X, torch::Tensor p) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(p);
if (X.device().type() == torch::DeviceType::CPU) {
return act_forward_cpu(group_index, X, p);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return act_forward_gpu(group_index, X, p);
}
}
std::vector<torch::Tensor> act_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(p);
CHECK_CONTIGUOUS(grad);
if (X.device().type() == torch::DeviceType::CPU) {
return act_backward_cpu(group_index, grad, X, p);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return act_backward_gpu(group_index, grad, X, p);
}
}
torch::Tensor act4(int group_index, torch::Tensor X, torch::Tensor p) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(p);
if (X.device().type() == torch::DeviceType::CPU) {
return act4_forward_cpu(group_index, X, p);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return act4_forward_gpu(group_index, X, p);
}
}
std::vector<torch::Tensor> act4_backward(int group_index, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(p);
CHECK_CONTIGUOUS(grad);
if (X.device().type() == torch::DeviceType::CPU) {
return act4_backward_cpu(group_index, grad, X, p);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return act4_backward_gpu(group_index, grad, X, p);
}
}
torch::Tensor as_matrix(int group_index, torch::Tensor X) {
CHECK_CONTIGUOUS(X);
if (X.device().type() == torch::DeviceType::CPU) {
return as_matrix_forward_cpu(group_index, X);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return as_matrix_forward_gpu(group_index, X);
}
}
torch::Tensor Jinv(int group_index, torch::Tensor X, torch::Tensor a) {
CHECK_CONTIGUOUS(X);
CHECK_CONTIGUOUS(a);
if (X.device().type() == torch::DeviceType::CPU) {
return jleft_forward_cpu(group_index, X, a);
} else if (X.device().type() == torch::DeviceType::CUDA) {
return jleft_forward_gpu(group_index, X, a);
}
}
// {exp, log, inv, mul, adj, adjT, act, act4} forward/backward bindings
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("expm", &expm, "exp map forward");
m.def("expm_backward", &expm_backward, "exp map backward");
m.def("logm", &logm, "log map forward");
m.def("logm_backward", &logm_backward, "log map backward");
m.def("inv", &inv, "inverse operator");
m.def("inv_backward", &inv_backward, "inverse operator backward");
m.def("mul", &mul, "group operator");
m.def("mul_backward", &mul_backward, "group operator backward");
m.def("adj", &adj, "adjoint operator");
m.def("adj_backward", &adj_backward, "adjoint operator backward");
m.def("adjT", &adjT, "transposed adjoint operator");
m.def("adjT_backward", &adjT_backward, "transposed adjoint operator backward");
m.def("act", &act, "action on point");
m.def("act_backward", &act_backward, "action on point backward");
m.def("act4", &act4, "action on homogeneous point");
m.def("act4_backward", &act4_backward, "action on homogeneous point backward");
// functions with no gradient
m.def("as_matrix", &as_matrix, "convert to matrix");
m.def("Jinv", &Jinv, "left inverse jacobian operator");
};
#include "lietorch_cpu.h"
#include <Eigen/Dense>
#include <iostream>
#include "common.h"
#include "dispatch.h"
#include "so3.h"
#include "rxso3.h"
#include "se3.h"
#include "sim3.h"
template <typename Group, typename scalar_t>
void exp_forward_kernel(const scalar_t* a_ptr, scalar_t* X_ptr, int batch_size) {
// exponential map forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Tangent a(a_ptr + i*Group::K);
Eigen::Map<Data>(X_ptr + i*Group::N) = Group::Exp(a).data();
}
});
}
template <typename Group, typename scalar_t>
void exp_backward_kernel(const scalar_t* grad, const scalar_t* a_ptr, scalar_t* da, int batch_size) {
// exponential map backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Tangent a(a_ptr + i*Group::K);
Grad dX(grad + i*Group::N);
Eigen::Map<Grad>(da + i*Group::K) = dX * Group::left_jacobian(a);
}
});
}
template <typename Group, typename scalar_t>
void log_forward_kernel(const scalar_t* X_ptr, scalar_t* a_ptr, int batch_size) {
// logarithm map forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Tangent a = Group(X_ptr + i*Group::N).Log();
Eigen::Map<Tangent>(a_ptr + i*Group::K) = a;
}
});
}
template <typename Group, typename scalar_t>
void log_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int batch_size) {
// logarithm map backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Tangent a = Group(X_ptr + i*Group::N).Log();
Grad da(grad + i*Group::K);
Eigen::Map<Grad>(dX + i*Group::N) = da * Group::left_jacobian_inverse(a);
}
});
}
template <typename Group, typename scalar_t>
void inv_forward_kernel(const scalar_t* X_ptr, scalar_t* Y_ptr, int batch_size) {
// group inverse forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Eigen::Map<Data>(Y_ptr + i*Group::N) = X.inv().data();
}
});
}
template <typename Group, typename scalar_t>
void inv_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t *dX, int batch_size) {
// group inverse backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group Y = Group(X_ptr + i*Group::N).inv();
Grad dY(grad + i*Group::N);
Eigen::Map<Grad>(dX + i*Group::N) = -dY * Y.Adj();
}
});
}
template <typename Group, typename scalar_t>
void mul_forward_kernel(const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* Z_ptr, int batch_size) {
// group multiplication forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group Z = Group(X_ptr + i*Group::N) * Group(Y_ptr + i*Group::N);
Eigen::Map<Data>(Z_ptr + i*Group::N) = Z.data();
}
});
}
template <class Group, typename scalar_t>
void mul_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* dX, scalar_t* dY, int batch_size) {
// group multiplication backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Grad dZ(grad + i*Group::N);
Group X(X_ptr + i*Group::N);
Eigen::Map<Grad>(dX + i*Group::N) = dZ;
Eigen::Map<Grad>(dY + i*Group::N) = dZ * X.Adj();
}
});
}
template <typename Group, typename scalar_t>
void adj_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) {
// adjoint forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Tangent a(a_ptr + i*Group::K);
Eigen::Map<Tangent>(b_ptr + i*Group::K) = X.Adj(a);
}
});
}
template <typename Group, typename scalar_t>
void adj_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int batch_size) {
// adjoint backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Grad db(grad + i*Group::K);
Tangent a(a_ptr + i*Group::K);
Tangent b = X.Adj() * a;
Eigen::Map<Grad>(da + i*Group::K) = db * X.Adj();
Eigen::Map<Grad>(dX + i*Group::N) = -db * Group::adj(b);
}
});
}
template <typename Group, typename scalar_t>
void adjT_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) {
// adjoint forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Tangent a(a_ptr + i*Group::K);
Eigen::Map<Tangent>(b_ptr + i*Group::K) = X.AdjT(a);
}
});
}
template <typename Group, typename scalar_t>
void adjT_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int batch_size) {
// adjoint backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Tangent db(grad + i*Group::K);
Grad a(a_ptr + i*Group::K);
Eigen::Map<Tangent>(da + i*Group::K) = X.Adj(db);
Eigen::Map<Grad>(dX + i*Group::N) = -a * Group::adj(X.Adj(db));
}
});
}
template <typename Group, typename scalar_t>
void act_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int batch_size) {
// action on point forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
using Point = Eigen::Matrix<scalar_t,3,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Point p(p_ptr + i*3);
Eigen::Map<Point>(q_ptr + i*3) = X * p;
}
});
}
template <typename Group, typename scalar_t>
void act_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int batch_size) {
// adjoint backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Point = Eigen::Matrix<scalar_t,3,1>;
using PointGrad = Eigen::Matrix<scalar_t,1,3>;
using Transformation = Eigen::Matrix<scalar_t,4,4>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Point p(p_ptr + i*3);
PointGrad dq(grad + i*3);
Eigen::Map<PointGrad>(dp + i*3) = dq * X.Matrix().template block<3,3>(0,0);
Eigen::Map<Grad>(dX + i*Group::N) = dq * Group::act_jacobian(X*p);
}
});
}
template <typename Group, typename scalar_t>
void act4_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int batch_size) {
// action on homogeneous point forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
using Point = Eigen::Matrix<scalar_t,4,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Point p(p_ptr + i*4);
Eigen::Map<Point>(q_ptr + i*4) = X.act4(p);
}
});
}
template <typename Group, typename scalar_t>
void act4_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int batch_size) {
// action on homogeneous point backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Point = Eigen::Matrix<scalar_t,4,1>;
using PointGrad = Eigen::Matrix<scalar_t,1,4>;
using Transformation = Eigen::Matrix<scalar_t,4,4>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Point p(p_ptr + i*4);
PointGrad dq(grad + i*4);
Eigen::Map<PointGrad>(dp + i*4) = dq * X.Matrix4x4();
const Point q = X.act4(p);
Eigen::Map<Grad>(dX + i*Group::N) = dq * Group::act4_jacobian(q);
}
});
}
template <typename Group, typename scalar_t>
void as_matrix_forward_kernel(const scalar_t* X_ptr, scalar_t* T_ptr, int batch_size) {
// group inverse forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
using Matrix4 = Eigen::Matrix<scalar_t,4,4,Eigen::RowMajor>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Eigen::Map<Matrix4>(T_ptr + i*16) = X.Matrix4x4();
}
});
}
template <typename Group, typename scalar_t>
void jleft_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int batch_size) {
// left-jacobian inverse action
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
at::parallel_for(0, batch_size, 1, [&](int64_t start, int64_t end) {
for (int64_t i=start; i<end; i++) {
Group X(X_ptr + i*Group::N);
Tangent a(a_ptr + i*Group::K);
Tangent b = Group::left_jacobian_inverse(X.Log()) * a;
Eigen::Map<Tangent>(b_ptr + i*Group::K) = b;
}
});
}
// unary operations
torch::Tensor exp_forward_cpu(int group_id, torch::Tensor a) {
int batch_size = a.size(0);
torch::Tensor X;
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_forward_kernel", ([&] {
X = torch::zeros({batch_size, group_t::N}, a.options());
exp_forward_kernel<group_t, scalar_t>(
a.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
batch_size);
}));
return X;
}
std::vector<torch::Tensor> exp_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor a) {
int batch_size = a.size(0);
torch::Tensor da = torch::zeros(a.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_backward_kernel", ([&] {
exp_backward_kernel<group_t, scalar_t>(
grad.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
da.data_ptr<scalar_t>(),
batch_size);
}));
return {da};
}
torch::Tensor log_forward_cpu(int group_id, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor a;
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_forward_kernel", ([&] {
a = torch::zeros({batch_size, group_t::K}, X.options());
log_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
batch_size);
}));
return a;
}
std::vector<torch::Tensor> log_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_backward_kernel", ([&] {
log_backward_kernel<group_t, scalar_t>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
batch_size);
}));
return {dX};
}
torch::Tensor inv_forward_cpu(int group_id, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor Y = torch::zeros_like(X);
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_forward_kernel", ([&] {
inv_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
Y.data_ptr<scalar_t>(),
batch_size);
}));
return Y;
}
std::vector<torch::Tensor> inv_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_backward_kernel", ([&] {
inv_backward_kernel<group_t, scalar_t>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
batch_size);
}));
return {dX};
}
// binary operations
torch::Tensor mul_forward_cpu(int group_id, torch::Tensor X, torch::Tensor Y) {
int batch_size = X.size(0);
torch::Tensor Z = torch::zeros_like(X);
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_forward_kernel", ([&] {
mul_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
Y.data_ptr<scalar_t>(),
Z.data_ptr<scalar_t>(),
batch_size);
}));
return Z;
}
std::vector<torch::Tensor> mul_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor dY = torch::zeros(Y.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_backward_kernel", ([&] {
mul_backward_kernel<group_t, scalar_t>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
Y.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
dY.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, dY};
}
torch::Tensor adj_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor b = torch::zeros(a.sizes(), a.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_forward_kernel", ([&] {
adj_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
b.data_ptr<scalar_t>(),
batch_size);
}));
return b;
}
std::vector<torch::Tensor> adj_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor da = torch::zeros(a.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_backward_kernel", ([&] {
adj_backward_kernel<group_t, scalar_t>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
da.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, da};
}
torch::Tensor adjT_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor b = torch::zeros(a.sizes(), a.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_forward_kernel", ([&] {
adjT_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
b.data_ptr<scalar_t>(),
batch_size);
}));
return b;
}
std::vector<torch::Tensor> adjT_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor da = torch::zeros(a.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_backward_kernel", ([&] {
adjT_backward_kernel<group_t, scalar_t>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
da.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, da};
}
torch::Tensor act_forward_cpu(int group_id, torch::Tensor X, torch::Tensor p) {
int batch_size = X.size(0);
torch::Tensor q = torch::zeros(p.sizes(), p.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_forward_kernel", ([&] {
act_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
p.data_ptr<scalar_t>(),
q.data_ptr<scalar_t>(),
batch_size);
}));
return q;
}
std::vector<torch::Tensor> act_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor dp = torch::zeros(p.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_backward_kernel", ([&] {
act_backward_kernel<group_t, scalar_t>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
p.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
dp.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, dp};
}
torch::Tensor act4_forward_cpu(int group_id, torch::Tensor X, torch::Tensor p) {
int batch_size = X.size(0);
torch::Tensor q = torch::zeros(p.sizes(), p.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_forward_kernel", ([&] {
act4_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
p.data_ptr<scalar_t>(),
q.data_ptr<scalar_t>(),
batch_size);
}));
return q;
}
std::vector<torch::Tensor> act4_backward_cpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor dp = torch::zeros(p.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_backward_kernel", ([&] {
act4_backward_kernel<group_t, scalar_t>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
p.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
dp.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, dp};
}
torch::Tensor as_matrix_forward_cpu(int group_id, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor T4x4 = torch::zeros({X.size(0), 4, 4}, X.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "as_matrix_forward_kernel", ([&] {
as_matrix_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
T4x4.data_ptr<scalar_t>(),
batch_size);
}));
return T4x4;
}
torch::Tensor jleft_forward_cpu(int group_id, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor b = torch::zeros(a.sizes(), a.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "jleft_forward_kernel", ([&] {
jleft_forward_kernel<group_t, scalar_t>(
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
b.data_ptr<scalar_t>(),
batch_size);
}));
return b;
}
\ No newline at end of file
#include "lietorch_gpu.h"
#include <Eigen/Dense>
#include "common.h"
#include "dispatch.h"
#include "so3.h"
#include "rxso3.h"
#include "se3.h"
#include "sim3.h"
#define GPU_1D_KERNEL_LOOP(i, n) \
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i<n; i += blockDim.x * gridDim.x)
#define NUM_THREADS 256
#define NUM_BLOCKS(batch_size) ((batch_size + NUM_THREADS - 1) / NUM_THREADS)
template <typename Group, typename scalar_t>
__global__ void exp_forward_kernel(const scalar_t* a_ptr, scalar_t* X_ptr, int num_threads) {
// exponential map forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Tangent a(a_ptr + i*Group::K);
Eigen::Map<Data>(X_ptr + i*Group::N) = Group::Exp(a).data();
}
}
template <typename Group, typename scalar_t>
__global__ void exp_backward_kernel(const scalar_t* grad, const scalar_t* a_ptr, scalar_t* da, int num_threads) {
// exponential map backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Tangent a(a_ptr + i*Group::K);
Grad dX(grad + i*Group::N);
Eigen::Map<Grad>(da + i*Group::K) = dX * Group::left_jacobian(a);
}
}
template <typename Group, typename scalar_t>
__global__ void log_forward_kernel(const scalar_t* X_ptr, scalar_t* a_ptr, int num_threads) {
// logarithm map forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Tangent a = Group(X_ptr + i*Group::N).Log();
Eigen::Map<Tangent>(a_ptr + i*Group::K) = a;
}
}
template <typename Group, typename scalar_t>
__global__ void log_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t* dX, int num_threads) {
// logarithm map backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Tangent a = Group(X_ptr + i*Group::N).Log();
Grad da(grad + i*Group::K);
Eigen::Map<Grad>(dX + i*Group::N) = da * Group::left_jacobian_inverse(a);
}
}
template <typename Group, typename scalar_t>
__global__ void inv_forward_kernel(const scalar_t* X_ptr, scalar_t* Y_ptr, int num_threads) {
// group inverse forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Eigen::Map<Data>(Y_ptr + i*Group::N) = X.inv().data();
}
}
template <typename Group, typename scalar_t>
__global__ void inv_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, scalar_t *dX, int num_threads) {
// group inverse backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group Y = Group(X_ptr + i*Group::N).inv();
Grad dY(grad + i*Group::N);
Eigen::Map<Grad>(dX + i*Group::N) = -dY * Y.Adj();
}
}
template <typename Group, typename scalar_t>
__global__ void mul_forward_kernel(const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* Z_ptr, int num_threads) {
// group multiplication forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group Z = Group(X_ptr + i*Group::N) * Group(Y_ptr + i*Group::N);
Eigen::Map<Data>(Z_ptr + i*Group::N) = Z.data();
}
}
template <class Group, typename scalar_t>
__global__ void mul_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* Y_ptr, scalar_t* dX, scalar_t* dY, int num_threads) {
// group multiplication backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Grad dZ(grad + i*Group::N);
Group X(X_ptr + i*Group::N);
Eigen::Map<Grad>(dX + i*Group::N) = dZ;
Eigen::Map<Grad>(dY + i*Group::N) = dZ * X.Adj();
}
}
template <typename Group, typename scalar_t>
__global__ void adj_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) {
// adjoint forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Tangent a(a_ptr + i*Group::K);
Eigen::Map<Tangent>(b_ptr + i*Group::K) = X.Adj(a);
}
}
template <typename Group, typename scalar_t>
__global__ void adj_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int num_threads) {
// adjoint backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Grad db(grad + i*Group::K);
Tangent a(a_ptr + i*Group::K);
Tangent b = X.Adj() * a;
Eigen::Map<Grad>(da + i*Group::K) = db * X.Adj();
Eigen::Map<Grad>(dX + i*Group::N) = -db * Group::adj(b);
}
}
template <typename Group, typename scalar_t>
__global__ void adjT_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) {
// adjoint forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Tangent a(a_ptr + i*Group::K);
Eigen::Map<Tangent>(b_ptr + i*Group::K) = X.AdjT(a);
}
}
template <typename Group, typename scalar_t>
__global__ void adjT_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* dX, scalar_t* da, int num_threads) {
// adjoint backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Tangent db(grad + i*Group::K);
Grad a(a_ptr + i*Group::K);
Eigen::Map<Tangent>(da + i*Group::K) = X.Adj(db);
Eigen::Map<Grad>(dX + i*Group::N) = -a * Group::adj(X.Adj(db));
}
}
template <typename Group, typename scalar_t>
__global__ void act_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int num_threads) {
// action on point forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
using Point = Eigen::Matrix<scalar_t,3,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Point p(p_ptr + i*3);
Eigen::Map<Point>(q_ptr + i*3) = X * p;
}
}
template <typename Group, typename scalar_t>
__global__ void act_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int num_threads) {
// adjoint backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Point = Eigen::Matrix<scalar_t,3,1>;
using PointGrad = Eigen::Matrix<scalar_t,1,3>;
using Transformation = Eigen::Matrix<scalar_t,4,4>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Point p(p_ptr + i*3);
PointGrad dq(grad + i*3);
Eigen::Map<PointGrad>(dp + i*3) = dq * X.Matrix().block<3,3>(0,0);
Eigen::Map<Grad>(dX + i*Group::N) = dq * Group::act_jacobian(X*p);
}
}
template <typename Group, typename scalar_t>
__global__ void act4_forward_kernel(const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* q_ptr, int num_threads) {
// action on point forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
using Point = Eigen::Matrix<scalar_t,4,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Point p(p_ptr + i*4);
Eigen::Map<Point>(q_ptr + i*4) = X.act4(p);
}
}
template <typename Group, typename scalar_t>
__global__ void act4_backward_kernel(const scalar_t* grad, const scalar_t* X_ptr, const scalar_t* p_ptr, scalar_t* dX, scalar_t* dp, int num_threads) {
// adjoint backward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Grad = Eigen::Matrix<scalar_t,1,Group::K>;
using Point = Eigen::Matrix<scalar_t,4,1>;
using PointGrad = Eigen::Matrix<scalar_t,1,4>;
using Transformation = Eigen::Matrix<scalar_t,4,4>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Point p(p_ptr + i*4);
PointGrad dq(grad + i*4);
Eigen::Map<PointGrad>(dp + i*4) = dq * X.Matrix4x4();
const Point q = X.act4(p);
Eigen::Map<Grad>(dX + i*Group::N) = dq * Group::act4_jacobian(q);
}
}
template <typename Group, typename scalar_t>
__global__ void as_matrix_forward_kernel(const scalar_t* X_ptr, scalar_t* T_ptr, int num_threads) {
// group inverse forward kernel
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
using Matrix4 = Eigen::Matrix<scalar_t,4,4,Eigen::RowMajor>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Eigen::Map<Matrix4>(T_ptr + i*16) = X.Matrix4x4();
}
}
template <typename Group, typename scalar_t>
__global__ void jleft_forward_kernel(const scalar_t* X_ptr, const scalar_t* a_ptr, scalar_t* b_ptr, int num_threads) {
// left jacobian inverse action
using Tangent = Eigen::Matrix<scalar_t,Group::K,1>;
using Data = Eigen::Matrix<scalar_t,Group::N,1>;
GPU_1D_KERNEL_LOOP(i, num_threads) {
Group X(X_ptr + i*Group::N);
Tangent a(a_ptr + i*Group::K);
Tangent b = Group::left_jacobian_inverse(X.Log()) * a;
Eigen::Map<Tangent>(b_ptr + i*Group::K) = b;
}
}
// unary operations
torch::Tensor exp_forward_gpu(int group_id, torch::Tensor a) {
int batch_size = a.size(0);
torch::Tensor X;
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_forward_kernel", ([&] {
X = torch::zeros({batch_size, group_t::N}, a.options());
exp_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
a.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
batch_size);
}));
return X;
}
std::vector<torch::Tensor> exp_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor a) {
int batch_size = a.size(0);
torch::Tensor da = torch::zeros(a.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, a.type(), "exp_backward_kernel", ([&] {
exp_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
grad.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
da.data_ptr<scalar_t>(),
batch_size);
}));
return {da};
}
torch::Tensor log_forward_gpu(int group_id, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor a;
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_forward_kernel", ([&] {
a = torch::zeros({batch_size, group_t::K}, X.options());
log_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
batch_size);
}));
return a;
}
std::vector<torch::Tensor> log_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "log_backward_kernel", ([&] {
log_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
batch_size);
}));
return {dX};
}
torch::Tensor inv_forward_gpu(int group_id, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor Y = torch::zeros_like(X);
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_forward_kernel", ([&] {
inv_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
Y.data_ptr<scalar_t>(),
batch_size);
}));
return Y;
}
std::vector<torch::Tensor> inv_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "inv_backward_kernel", ([&] {
inv_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
batch_size);
}));
return {dX};
}
// binary operations
torch::Tensor mul_forward_gpu(int group_id, torch::Tensor X, torch::Tensor Y) {
int batch_size = X.size(0);
torch::Tensor Z = torch::zeros_like(X);
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_forward_kernel", ([&] {
mul_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
Y.data_ptr<scalar_t>(),
Z.data_ptr<scalar_t>(),
batch_size);
}));
return Z;
}
std::vector<torch::Tensor> mul_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor Y) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor dY = torch::zeros(Y.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "mul_backward_kernel", ([&] {
mul_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
Y.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
dY.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, dY};
}
torch::Tensor adj_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor b = torch::zeros(a.sizes(), a.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_forward_kernel", ([&] {
adj_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
b.data_ptr<scalar_t>(),
batch_size);
}));
return b;
}
std::vector<torch::Tensor> adj_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor da = torch::zeros(a.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adj_backward_kernel", ([&] {
adj_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
da.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, da};
}
torch::Tensor adjT_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor b = torch::zeros(a.sizes(), a.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_forward_kernel", ([&] {
adjT_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
b.data_ptr<scalar_t>(),
batch_size);
}));
return b;
}
std::vector<torch::Tensor> adjT_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor da = torch::zeros(a.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "adjT_backward_kernel", ([&] {
adjT_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
da.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, da};
}
torch::Tensor act_forward_gpu(int group_id, torch::Tensor X, torch::Tensor p) {
int batch_size = X.size(0);
torch::Tensor q = torch::zeros(p.sizes(), p.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_forward_kernel", ([&] {
act_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
p.data_ptr<scalar_t>(),
q.data_ptr<scalar_t>(),
batch_size);
}));
return q;
}
std::vector<torch::Tensor> act_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor dp = torch::zeros(p.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act_backward_kernel", ([&] {
act_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
p.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
dp.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, dp};
}
torch::Tensor act4_forward_gpu(int group_id, torch::Tensor X, torch::Tensor p) {
int batch_size = X.size(0);
torch::Tensor q = torch::zeros(p.sizes(), p.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_forward_kernel", ([&] {
act4_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
p.data_ptr<scalar_t>(),
q.data_ptr<scalar_t>(),
batch_size);
}));
return q;
}
std::vector<torch::Tensor> act4_backward_gpu(int group_id, torch::Tensor grad, torch::Tensor X, torch::Tensor p) {
int batch_size = X.size(0);
torch::Tensor dX = torch::zeros(X.sizes(), grad.options());
torch::Tensor dp = torch::zeros(p.sizes(), grad.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "act4_backward_kernel", ([&] {
act4_backward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
grad.data_ptr<scalar_t>(),
X.data_ptr<scalar_t>(),
p.data_ptr<scalar_t>(),
dX.data_ptr<scalar_t>(),
dp.data_ptr<scalar_t>(),
batch_size);
}));
return {dX, dp};
}
torch::Tensor as_matrix_forward_gpu(int group_id, torch::Tensor X) {
int batch_size = X.size(0);
torch::Tensor T4x4 = torch::zeros({X.size(0), 4, 4}, X.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "as_matrix_forward_kernel", ([&] {
as_matrix_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
T4x4.data_ptr<scalar_t>(),
batch_size);
}));
return T4x4;
}
torch::Tensor jleft_forward_gpu(int group_id, torch::Tensor X, torch::Tensor a) {
int batch_size = X.size(0);
torch::Tensor b = torch::zeros(a.sizes(), a.options());
DISPATCH_GROUP_AND_FLOATING_TYPES(group_id, X.type(), "jleft_forward_kernel", ([&] {
jleft_forward_kernel<group_t, scalar_t><<<NUM_BLOCKS(batch_size), NUM_THREADS>>>(
X.data_ptr<scalar_t>(),
a.data_ptr<scalar_t>(),
b.data_ptr<scalar_t>(),
batch_size);
}));
return b;
}
#!/bin/bash
python lietorch/run_tests.py
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
import os.path as osp
ROOT = osp.dirname(osp.abspath(__file__))
print(ROOT)
setup(
name='lietorch',
version='0.1',
description='Lie Groups for PyTorch',
author='teedrz',
packages=['lietorch'],
ext_modules=[
CUDAExtension('lietorch_backends',
include_dirs=[
osp.join(ROOT, 'lietorch/include'),
osp.join(ROOT, 'eigen')],
sources=[
'lietorch/src/lietorch.cpp',
'lietorch/src/lietorch_gpu.cu',
'lietorch/src/lietorch_cpu.cpp'],
extra_compile_args={
'cxx': ['-O2'],
'nvcc': ['-O2',
'-gencode=arch=compute_60,code=sm_60',
'-gencode=arch=compute_61,code=sm_61',
'-gencode=arch=compute_70,code=sm_70',
'-gencode=arch=compute_75,code=sm_75',
'-gencode=arch=compute_75,code=compute_75',
]
}),
CUDAExtension('lietorch_extras',
sources=[
'lietorch/extras/altcorr_kernel.cu',
'lietorch/extras/corr_index_kernel.cu',
'lietorch/extras/se3_builder.cu',
'lietorch/extras/se3_inplace_builder.cu',
'lietorch/extras/se3_solver.cu',
'lietorch/extras/extras.cpp',
],
extra_compile_args={
'cxx': ['-O2'],
'nvcc': ['-O2',
'-gencode=arch=compute_60,code=sm_60',
'-gencode=arch=compute_61,code=sm_61',
'-gencode=arch=compute_70,code=sm_70',
'-gencode=arch=compute_75,code=sm_75',
'-gencode=arch=compute_75,code=compute_75',
]
}),
],
cmdclass={ 'build_ext': BuildExtension }
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment