add lietorch src code and eigen src code, update readme

266d4fd9 · zhanggzh · e7df8655 · 266d4fd9 · 266d4fd9 · 266d4fd9
Commit 266d4fd9 authored Jun 03, 2025 by zhanggzh
20 changed files
--- a/eigen-master/Eigen/src/Core/DiagonalMatrix.h
+++ b/eigen-master/Eigen/src/Core/DiagonalMatrix.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_DIAGONALMATRIX_H
+#define EIGEN_DIAGONALMATRIX_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+/** \class DiagonalBase
+ * \ingroup Core_Module
+ *
+ * \brief Base class for diagonal matrices and expressions
+ *
+ * This is the base class that is inherited by diagonal matrix and related expression
+ * types, which internally use a vector for storing the diagonal entries. Diagonal
+ * types always represent square matrices.
+ *
+ * \tparam Derived is the derived type, a DiagonalMatrix or DiagonalWrapper.
+ *
+ * \sa class DiagonalMatrix, class DiagonalWrapper
+ */
+template <typename Derived>
+class DiagonalBase : public EigenBase<Derived> {
+ public:
+  typedef typename internal::traits<Derived>::DiagonalVectorType DiagonalVectorType;
+  typedef typename DiagonalVectorType::Scalar Scalar;
+  typedef typename DiagonalVectorType::RealScalar RealScalar;
+  typedef typename internal::traits<Derived>::StorageKind StorageKind;
+  typedef typename internal::traits<Derived>::StorageIndex StorageIndex;
+  enum {
+    RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    IsVectorAtCompileTime = 0,
+    Flags = NoPreferredStorageOrderBit
+  };
+  typedef Matrix<Scalar, RowsAtCompileTime, ColsAtCompileTime, 0, MaxRowsAtCompileTime, MaxColsAtCompileTime>
+      DenseMatrixType;
+  typedef DenseMatrixType DenseType;
+  typedef DiagonalMatrix<Scalar, DiagonalVectorType::SizeAtCompileTime, DiagonalVectorType::MaxSizeAtCompileTime>
+      PlainObject;
+  /** \returns a reference to the derived object. */
+  EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+  /** \returns a const reference to the derived object. */
+  EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast<Derived*>(this); }
+  /**
+   * Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type,
+   * not an expression.
+   * \returns A dense matrix, with its diagonal entries set from the the derived object. */
+  EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); }
+  /** \returns a reference to the derived object's vector of diagonal coefficients. */
+  EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
+  /** \returns a const reference to the derived object's vector of diagonal coefficients. */
+  EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
+  /** \returns the value of the coefficient as if \c *this was a dense matrix. */
+  EIGEN_DEVICE_FUNC inline Scalar coeff(Index row, Index col) const {
+    eigen_assert(row >= 0 && col >= 0 && row < rows() && col <= cols());
+    return row == col ? diagonal().coeff(row) : Scalar(0);
+  }
+  /** \returns the number of rows. */
+  EIGEN_DEVICE_FUNC constexpr Index rows() const { return diagonal().size(); }
+  /** \returns the number of columns. */
+  EIGEN_DEVICE_FUNC constexpr Index cols() const { return diagonal().size(); }
+  /** \returns the diagonal matrix product of \c *this by the dense matrix, \a matrix */
+  template <typename MatrixDerived>
+  EIGEN_DEVICE_FUNC const Product<Derived, MatrixDerived, LazyProduct> operator*(
+      const MatrixBase<MatrixDerived>& matrix) const {
+    return Product<Derived, MatrixDerived, LazyProduct>(derived(), matrix.derived());
+  }
+  template <typename OtherDerived>
+  using DiagonalProductReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+      DiagonalVectorType, typename OtherDerived::DiagonalVectorType, product)>;
+  /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a other */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC const DiagonalProductReturnType<OtherDerived> operator*(
+      const DiagonalBase<OtherDerived>& other) const {
+    return diagonal().cwiseProduct(other.diagonal()).asDiagonal();
+  }
+  using DiagonalInverseReturnType =
+      DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType>>;
+  /** \returns the inverse \c *this. Computed as the coefficient-wise inverse of the diagonal. */
+  EIGEN_DEVICE_FUNC inline const DiagonalInverseReturnType inverse() const {
+    return diagonal().cwiseInverse().asDiagonal();
+  }
+  using DiagonalScaleReturnType =
+      DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType, Scalar, product)>;
+  /** \returns the product of \c *this by the scalar \a scalar */
+  EIGEN_DEVICE_FUNC inline const DiagonalScaleReturnType operator*(const Scalar& scalar) const {
+    return (diagonal() * scalar).asDiagonal();
+  }
+  using ScaleDiagonalReturnType =
+      DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar, DiagonalVectorType, product)>;
+  /** \returns the product of a scalar and the diagonal matrix \a other */
+  EIGEN_DEVICE_FUNC friend inline const ScaleDiagonalReturnType operator*(const Scalar& scalar,
+                                                                          const DiagonalBase& other) {
+    return (scalar * other.diagonal()).asDiagonal();
+  }
+  template <typename OtherDerived>
+  using DiagonalSumReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+      DiagonalVectorType, typename OtherDerived::DiagonalVectorType, sum)>;
+  /** \returns the sum of \c *this and the diagonal matrix \a other */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC inline const DiagonalSumReturnType<OtherDerived> operator+(
+      const DiagonalBase<OtherDerived>& other) const {
+    return (diagonal() + other.diagonal()).asDiagonal();
+  }
+  template <typename OtherDerived>
+  using DiagonalDifferenceReturnType = DiagonalWrapper<const EIGEN_CWISE_BINARY_RETURN_TYPE(
+      DiagonalVectorType, typename OtherDerived::DiagonalVectorType, difference)>;
+  /** \returns the difference of \c *this and the diagonal matrix \a other */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC inline const DiagonalDifferenceReturnType<OtherDerived> operator-(
+      const DiagonalBase<OtherDerived>& other) const {
+    return (diagonal() - other.diagonal()).asDiagonal();
+  }
+};
+/** \class DiagonalMatrix
+ * \ingroup Core_Module
+ *
+ * \brief Represents a diagonal matrix with its storage
+ *
+ * \tparam Scalar_ the type of coefficients
+ * \tparam SizeAtCompileTime the dimension of the matrix, or Dynamic
+ * \tparam MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults
+ *        to SizeAtCompileTime. Most of the time, you do not need to specify it.
+ *
+ * \sa class DiagonalBase, class DiagonalWrapper
+ */
+namespace internal {
+template <typename Scalar_, int SizeAtCompileTime, int MaxSizeAtCompileTime>
+struct traits<DiagonalMatrix<Scalar_, SizeAtCompileTime, MaxSizeAtCompileTime>>
+    : traits<Matrix<Scalar_, SizeAtCompileTime, SizeAtCompileTime, 0, MaxSizeAtCompileTime, MaxSizeAtCompileTime>> {
+  typedef Matrix<Scalar_, SizeAtCompileTime, 1, 0, MaxSizeAtCompileTime, 1> DiagonalVectorType;
+  typedef DiagonalShape StorageKind;
+  enum { Flags = LvalueBit | NoPreferredStorageOrderBit | NestByRefBit };
+};
+}  // namespace internal
+template <typename Scalar_, int SizeAtCompileTime, int MaxSizeAtCompileTime>
+class DiagonalMatrix : public DiagonalBase<DiagonalMatrix<Scalar_, SizeAtCompileTime, MaxSizeAtCompileTime>> {
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  typedef typename internal::traits<DiagonalMatrix>::DiagonalVectorType DiagonalVectorType;
+  typedef const DiagonalMatrix& Nested;
+  typedef Scalar_ Scalar;
+  typedef typename internal::traits<DiagonalMatrix>::StorageKind StorageKind;
+  typedef typename internal::traits<DiagonalMatrix>::StorageIndex StorageIndex;
+#endif
+ protected:
+  DiagonalVectorType m_diagonal;
+ public:
+  /** const version of diagonal(). */
+  EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
+  /** \returns a reference to the stored vector of diagonal coefficients. */
+  EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return m_diagonal; }
+  /** Default constructor without initialization */
+  EIGEN_DEVICE_FUNC inline DiagonalMatrix() {}
+  /** Constructs a diagonal matrix with given dimension  */
+  EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
+  /** 2D constructor. */
+  EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x, y) {}
+  /** 3D constructor. */
+  EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x, y, z) {}
+  /** \brief Construct a diagonal matrix with fixed size from an arbitrary number of coefficients.
+   *
+   * \warning To construct a diagonal matrix of fixed size, the number of values passed to this
+   * constructor must match the fixed dimension of \c *this.
+   *
+   * \sa DiagonalMatrix(const Scalar&, const Scalar&)
+   * \sa DiagonalMatrix(const Scalar&, const Scalar&, const Scalar&)
+   */
+  template <typename... ArgTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DiagonalMatrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,
+                                                       const ArgTypes&... args)
+      : m_diagonal(a0, a1, a2, args...) {}
+  /** \brief Constructs a DiagonalMatrix and initializes it by elements given by an initializer list of initializer
+   * lists \cpp11
+   */
+  EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE DiagonalMatrix(
+      const std::initializer_list<std::initializer_list<Scalar>>& list)
+      : m_diagonal(list) {}
+  /** \brief Constructs a DiagonalMatrix from an r-value diagonal vector type */
+  EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(DiagonalVectorType&& diag) : m_diagonal(std::move(diag)) {}
+  /** Copy constructor. */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */
+  inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {}
+#endif
+  /** generic constructor from expression of the diagonal coefficients */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other) {}
+  /** Copy operator. */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other) {
+    m_diagonal = other.diagonal();
+    return *this;
+  }
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  /** This is a special case of the templated operator=. Its purpose is to
+   * prevent a default operator= from hiding the templated operator=.
+   */
+  EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalMatrix& other) {
+    m_diagonal = other.diagonal();
+    return *this;
+  }
+#endif
+  typedef DiagonalWrapper<const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, DiagonalVectorType>>
+      InitializeReturnType;
+  typedef DiagonalWrapper<const CwiseNullaryOp<internal::scalar_zero_op<Scalar>, DiagonalVectorType>>
+      ZeroInitializeReturnType;
+  /** Initializes a diagonal matrix of size SizeAtCompileTime with coefficients set to zero */
+  EIGEN_DEVICE_FUNC static const ZeroInitializeReturnType Zero() { return DiagonalVectorType::Zero().asDiagonal(); }
+  /** Initializes a diagonal matrix of size dim with coefficients set to zero */
+  EIGEN_DEVICE_FUNC static const ZeroInitializeReturnType Zero(Index size) {
+    return DiagonalVectorType::Zero(size).asDiagonal();
+  }
+  /** Initializes a identity matrix of size SizeAtCompileTime */
+  EIGEN_DEVICE_FUNC static const InitializeReturnType Identity() { return DiagonalVectorType::Ones().asDiagonal(); }
+  /** Initializes a identity matrix of size dim */
+  EIGEN_DEVICE_FUNC static const InitializeReturnType Identity(Index size) {
+    return DiagonalVectorType::Ones(size).asDiagonal();
+  }
+  /** Resizes to given size. */
+  EIGEN_DEVICE_FUNC inline void resize(Index size) { m_diagonal.resize(size); }
+  /** Sets all coefficients to zero. */
+  EIGEN_DEVICE_FUNC inline void setZero() { m_diagonal.setZero(); }
+  /** Resizes and sets all coefficients to zero. */
+  EIGEN_DEVICE_FUNC inline void setZero(Index size) { m_diagonal.setZero(size); }
+  /** Sets this matrix to be the identity matrix of the current size. */
+  EIGEN_DEVICE_FUNC inline void setIdentity() { m_diagonal.setOnes(); }
+  /** Sets this matrix to be the identity matrix of the given size. */
+  EIGEN_DEVICE_FUNC inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
+};
+/** \class DiagonalWrapper
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a diagonal matrix
+ *
+ * \tparam DiagonalVectorType_ the type of the vector of diagonal coefficients
+ *
+ * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients,
+ * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal()
+ * and most of the time this is the only way that it is used.
+ *
+ * \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal()
+ */
+namespace internal {
+template <typename DiagonalVectorType_>
+struct traits<DiagonalWrapper<DiagonalVectorType_>> {
+  typedef DiagonalVectorType_ DiagonalVectorType;
+  typedef typename DiagonalVectorType::Scalar Scalar;
+  typedef typename DiagonalVectorType::StorageIndex StorageIndex;
+  typedef DiagonalShape StorageKind;
+  typedef typename traits<DiagonalVectorType>::XprKind XprKind;
+  enum {
+    RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
+    MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime,
+    Flags = (traits<DiagonalVectorType>::Flags & LvalueBit) | NoPreferredStorageOrderBit
+  };
+};
+}  // namespace internal
+template <typename DiagonalVectorType_>
+class DiagonalWrapper : public DiagonalBase<DiagonalWrapper<DiagonalVectorType_>>, internal::no_assignment_operator {
+ public:
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  typedef DiagonalVectorType_ DiagonalVectorType;
+  typedef DiagonalWrapper Nested;
+#endif
+  /** Constructor from expression of diagonal coefficients to wrap. */
+  EIGEN_DEVICE_FUNC explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
+  /** \returns a const reference to the wrapped expression of diagonal coefficients. */
+  EIGEN_DEVICE_FUNC const DiagonalVectorType& diagonal() const { return m_diagonal; }
+ protected:
+  typename DiagonalVectorType::Nested m_diagonal;
+};
+/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients
+ *
+ * \only_for_vectors
+ *
+ * Example: \include MatrixBase_asDiagonal.cpp
+ * Output: \verbinclude MatrixBase_asDiagonal.out
+ *
+ * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal()
+ **/
+template <typename Derived>
+EIGEN_DEVICE_FUNC inline const DiagonalWrapper<const Derived> MatrixBase<Derived>::asDiagonal() const {
+  return DiagonalWrapper<const Derived>(derived());
+}
+/** \returns true if *this is approximately equal to a diagonal matrix,
+ *          within the precision given by \a prec.
+ *
+ * Example: \include MatrixBase_isDiagonal.cpp
+ * Output: \verbinclude MatrixBase_isDiagonal.out
+ *
+ * \sa asDiagonal()
+ */
+template <typename Derived>
+bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const {
+  if (cols() != rows()) return false;
+  RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
+  for (Index j = 0; j < cols(); ++j) {
+    RealScalar absOnDiagonal = numext::abs(coeff(j, j));
+    if (absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
+  }
+  for (Index j = 0; j < cols(); ++j)
+    for (Index i = 0; i < j; ++i) {
+      if (!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false;
+      if (!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false;
+    }
+  return true;
+}
+namespace internal {
+template <>
+struct storage_kind_to_shape<DiagonalShape> {
+  typedef DiagonalShape Shape;
+};
+struct Diagonal2Dense {};
+template <>
+struct AssignmentKind<DenseShape, DiagonalShape> {
+  typedef Diagonal2Dense Kind;
+};
+// Diagonal matrix to Dense assignment
+template <typename DstXprType, typename SrcXprType, typename Functor>
+struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense> {
+  static EIGEN_DEVICE_FUNC void run(
+      DstXprType& dst, const SrcXprType& src,
+      const internal::assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>& /*func*/) {
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols);
+    dst.setZero();
+    dst.diagonal() = src.diagonal();
+  }
+  static EIGEN_DEVICE_FUNC void run(
+      DstXprType& dst, const SrcXprType& src,
+      const internal::add_assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>& /*func*/) {
+    dst.diagonal() += src.diagonal();
+  }
+  static EIGEN_DEVICE_FUNC void run(
+      DstXprType& dst, const SrcXprType& src,
+      const internal::sub_assign_op<typename DstXprType::Scalar, typename SrcXprType::Scalar>& /*func*/) {
+    dst.diagonal() -= src.diagonal();
+  }
+};
+}  // namespace internal
+}  // end namespace Eigen
+#endif  // EIGEN_DIAGONALMATRIX_H
--- a/eigen-master/Eigen/src/Core/DiagonalProduct.h
+++ b/eigen-master/Eigen/src/Core/DiagonalProduct.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_DIAGONALPRODUCT_H
+#define EIGEN_DIAGONALPRODUCT_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal.
+ */
+template <typename Derived>
+template <typename DiagonalDerived>
+EIGEN_DEVICE_FUNC inline const Product<Derived, DiagonalDerived, LazyProduct> MatrixBase<Derived>::operator*(
+    const DiagonalBase<DiagonalDerived> &a_diagonal) const {
+  return Product<Derived, DiagonalDerived, LazyProduct>(derived(), a_diagonal.derived());
+}
+}  // end namespace Eigen
+#endif  // EIGEN_DIAGONALPRODUCT_H
--- a/eigen-master/Eigen/src/Core/Dot.h
+++ b/eigen-master/Eigen/src/Core/Dot.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008, 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_DOT_H
+#define EIGEN_DOT_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+template <typename Derived, typename Scalar = typename traits<Derived>::Scalar>
+struct squared_norm_impl {
+  using Real = typename NumTraits<Scalar>::Real;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) {
+    Scalar result = a.unaryExpr(squared_norm_functor<Scalar>()).sum();
+    return numext::real(result) + numext::imag(result);
+  }
+};
+template <typename Derived>
+struct squared_norm_impl<Derived, bool> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); }
+};
+}  // end namespace internal
+/** \fn MatrixBase::dot
+ * \returns the dot product of *this with other.
+ *
+ * \only_for_vectors
+ *
+ * \note If the scalar type is complex numbers, then this function returns the hermitian
+ * (sesquilinear) dot product, conjugate-linear in the first variable and linear in the
+ * second variable.
+ *
+ * \sa squaredNorm(), norm()
+ */
+template <typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+    typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,
+                                  typename internal::traits<OtherDerived>::Scalar>::ReturnType
+    MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const {
+  return internal::dot_impl<Derived, OtherDerived>::run(derived(), other.derived());
+}
+//---------- implementation of L2 norm and related functions ----------
+/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm.
+ * In both cases, it consists in the sum of the square of all the matrix entries.
+ * For vectors, this is also equals to the dot product of \c *this with itself.
+ *
+ * \sa dot(), norm(), lpNorm()
+ */
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::squaredNorm() const {
+  return internal::squared_norm_impl<Derived>::run(derived());
+}
+/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm.
+ * In both cases, it consists in the square root of the sum of the square of all the matrix entries.
+ * For vectors, this is also equals to the square root of the dot product of \c *this with itself.
+ *
+ * \sa lpNorm(), dot(), squaredNorm()
+ */
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+MatrixBase<Derived>::norm() const {
+  return numext::sqrt(squaredNorm());
+}
+/** \returns an expression of the quotient of \c *this by its own norm.
+ *
+ * \warning If the input vector is too small (i.e., this->norm()==0),
+ *          then this function returns a copy of the input.
+ *
+ * \only_for_vectors
+ *
+ * \sa norm(), normalize()
+ */
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::normalized()
+    const {
+  typedef typename internal::nested_eval<Derived, 2>::type Nested_;
+  Nested_ n(derived());
+  RealScalar z = n.squaredNorm();
+  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+  if (z > RealScalar(0))
+    return n / numext::sqrt(z);
+  else
+    return n;
+}
+/** Normalizes the vector, i.e. divides it by its own norm.
+ *
+ * \only_for_vectors
+ *
+ * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+ *
+ * \sa norm(), normalized()
+ */
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize() {
+  RealScalar z = squaredNorm();
+  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+  if (z > RealScalar(0)) derived() /= numext::sqrt(z);
+}
+/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
+ *
+ * \only_for_vectors
+ *
+ * This method is analogue to the normalized() method, but it reduces the risk of
+ * underflow and overflow when computing the norm.
+ *
+ * \warning If the input vector is too small (i.e., this->norm()==0),
+ *          then this function returns a copy of the input.
+ *
+ * \sa stableNorm(), stableNormalize(), normalized()
+ */
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
+MatrixBase<Derived>::stableNormalized() const {
+  typedef typename internal::nested_eval<Derived, 3>::type Nested_;
+  Nested_ n(derived());
+  RealScalar w = n.cwiseAbs().maxCoeff();
+  RealScalar z = (n / w).squaredNorm();
+  if (z > RealScalar(0))
+    return n / (numext::sqrt(z) * w);
+  else
+    return n;
+}
+/** Normalizes the vector while avoid underflow and overflow
+ *
+ * \only_for_vectors
+ *
+ * This method is analogue to the normalize() method, but it reduces the risk of
+ * underflow and overflow when computing the norm.
+ *
+ * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+ *
+ * \sa stableNorm(), stableNormalized(), normalize()
+ */
+template <typename Derived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize() {
+  RealScalar w = cwiseAbs().maxCoeff();
+  RealScalar z = (derived() / w).squaredNorm();
+  if (z > RealScalar(0)) derived() /= numext::sqrt(z) * w;
+}
+//---------- implementation of other norms ----------
+namespace internal {
+template <typename Derived, int p>
+struct lpNorm_selector {
+  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const MatrixBase<Derived>& m) {
+    EIGEN_USING_STD(pow)
+    return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1) / p);
+  }
+};
+template <typename Derived>
+struct lpNorm_selector<Derived, 1> {
+  EIGEN_DEVICE_FUNC static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(
+      const MatrixBase<Derived>& m) {
+    return m.cwiseAbs().sum();
+  }
+};
+template <typename Derived>
+struct lpNorm_selector<Derived, 2> {
+  EIGEN_DEVICE_FUNC static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(
+      const MatrixBase<Derived>& m) {
+    return m.norm();
+  }
+};
+template <typename Derived>
+struct lpNorm_selector<Derived, Infinity> {
+  typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const MatrixBase<Derived>& m) {
+    if (Derived::SizeAtCompileTime == 0 || (Derived::SizeAtCompileTime == Dynamic && m.size() == 0))
+      return RealScalar(0);
+    return m.cwiseAbs().maxCoeff();
+  }
+};
+}  // end namespace internal
+/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the
+ * p-th powers of the absolute values of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity,
+ * this function returns the \f$ \ell^\infty \f$ norm, that is the maximum of the absolute values of the coefficients of
+ * \c *this.
+ *
+ * In all cases, if \c *this is empty, then the value 0 is returned.
+ *
+ * \note For matrices, this function does not compute the <a
+ * href="https://en.wikipedia.org/wiki/Operator_norm">operator-norm</a>. That is, if \c *this is a matrix, then its
+ * coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm
+ * matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink.
+ *
+ * \sa norm()
+ */
+template <typename Derived>
+template <int p>
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
+#else
+EIGEN_DEVICE_FUNC MatrixBase<Derived>::RealScalar
+#endif
+MatrixBase<Derived>::lpNorm() const {
+  return internal::lpNorm_selector<Derived, p>::run(*this);
+}
+//---------- implementation of isOrthogonal / isUnitary ----------
+/** \returns true if *this is approximately orthogonal to \a other,
+ *          within the precision given by \a prec.
+ *
+ * Example: \include MatrixBase_isOrthogonal.cpp
+ * Output: \verbinclude MatrixBase_isOrthogonal.out
+ */
+template <typename Derived>
+template <typename OtherDerived>
+bool MatrixBase<Derived>::isOrthogonal(const MatrixBase<OtherDerived>& other, const RealScalar& prec) const {
+  typename internal::nested_eval<Derived, 2>::type nested(derived());
+  typename internal::nested_eval<OtherDerived, 2>::type otherNested(other.derived());
+  return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm();
+}
+/** \returns true if *this is approximately an unitary matrix,
+ *          within the precision given by \a prec. In the case where the \a Scalar
+ *          type is real numbers, a unitary matrix is an orthogonal matrix, whence the name.
+ *
+ * \note This can be used to check whether a family of vectors forms an orthonormal basis.
+ *       Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an
+ *       orthonormal basis.
+ *
+ * Example: \include MatrixBase_isUnitary.cpp
+ * Output: \verbinclude MatrixBase_isUnitary.out
+ */
+template <typename Derived>
+bool MatrixBase<Derived>::isUnitary(const RealScalar& prec) const {
+  typename internal::nested_eval<Derived, 1>::type self(derived());
+  for (Index i = 0; i < cols(); ++i) {
+    if (!internal::isApprox(self.col(i).squaredNorm(), static_cast<RealScalar>(1), prec)) return false;
+    for (Index j = 0; j < i; ++j)
+      if (!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast<Scalar>(1), prec)) return false;
+  }
+  return true;
+}
+}  // end namespace Eigen
+#endif  // EIGEN_DOT_H
--- a/eigen-master/Eigen/src/Core/EigenBase.h
+++ b/eigen-master/Eigen/src/Core/EigenBase.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_EIGENBASE_H
+#define EIGEN_EIGENBASE_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+/** \class EigenBase
+ * \ingroup Core_Module
+ *
+ * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
+ *
+ * In other words, an EigenBase object is an object that can be copied into a MatrixBase.
+ *
+ * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
+ *
+ * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
+ *
+ * \sa \blank \ref TopicClassHierarchy
+ */
+template <typename Derived>
+struct EigenBase {
+  //   typedef typename internal::plain_matrix_type<Derived>::type PlainObject;
+  /** \brief The interface type of indices
+   * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE.
+   * \sa StorageIndex, \ref TopicPreprocessorDirectives.
+   * DEPRECATED: Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead.
+   * Deprecation is not marked with a doxygen comment because there are too many existing usages to add the deprecation
+   * attribute.
+   */
+  typedef Eigen::Index Index;
+  // FIXME is it needed?
+  typedef typename internal::traits<Derived>::StorageKind StorageKind;
+  /** \returns a reference to the derived object */
+  EIGEN_DEVICE_FUNC constexpr Derived& derived() { return *static_cast<Derived*>(this); }
+  /** \returns a const reference to the derived object */
+  EIGEN_DEVICE_FUNC constexpr const Derived& derived() const { return *static_cast<const Derived*>(this); }
+  EIGEN_DEVICE_FUNC inline constexpr Derived& const_cast_derived() const {
+    return *static_cast<Derived*>(const_cast<EigenBase*>(this));
+  }
+  EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast<const Derived*>(this); }
+  /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
+  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return derived().rows(); }
+  /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
+  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return derived().cols(); }
+  /** \returns the number of coefficients, which is rows()*cols().
+   * \sa rows(), cols(), SizeAtCompileTime. */
+  EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return rows() * cols(); }
+  /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
+  template <typename Dest>
+  EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const {
+    derived().evalTo(dst);
+  }
+  /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
+  template <typename Dest>
+  EIGEN_DEVICE_FUNC inline void addTo(Dest& dst) const {
+    // This is the default implementation,
+    // derived class can reimplement it in a more optimized way.
+    typename Dest::PlainObject res(rows(), cols());
+    evalTo(res);
+    dst += res;
+  }
+  /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
+  template <typename Dest>
+  EIGEN_DEVICE_FUNC inline void subTo(Dest& dst) const {
+    // This is the default implementation,
+    // derived class can reimplement it in a more optimized way.
+    typename Dest::PlainObject res(rows(), cols());
+    evalTo(res);
+    dst -= res;
+  }
+  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
+  template <typename Dest>
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const {
+    // This is the default implementation,
+    // derived class can reimplement it in a more optimized way.
+    dst = dst * this->derived();
+  }
+  /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
+  template <typename Dest>
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const {
+    // This is the default implementation,
+    // derived class can reimplement it in a more optimized way.
+    dst = this->derived() * dst;
+  }
+  template <typename Device>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<Derived, Device> device(Device& device);
+  template <typename Device>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper<const Derived, Device> device(Device& device) const;
+};
+/***************************************************************************
+ * Implementation of matrix base methods
+ ***************************************************************************/
+/** \brief Copies the generic expression \a other into *this.
+ *
+ * \details The expression must provide a (templated) evalTo(Derived& dst) const
+ * function which does the actual job. In practice, this allows any user to write
+ * its own special matrix without having to modify MatrixBase
+ *
+ * \returns a reference to *this.
+ */
+template <typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived>& other) {
+  call_assignment(derived(), other.derived());
+  return derived();
+}
+template <typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived>& other) {
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar, typename OtherDerived::Scalar>());
+  return derived();
+}
+template <typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived>& other) {
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar, typename OtherDerived::Scalar>());
+  return derived();
+}
+}  // end namespace Eigen
+#endif  // EIGEN_EIGENBASE_H
--- a/eigen-master/Eigen/src/Core/Fill.h
+++ b/eigen-master/Eigen/src/Core/Fill.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2024 Charles Schlosser <cs.schlosser@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_FILL_H
+#define EIGEN_FILL_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+template <typename Xpr>
+struct eigen_fill_helper : std::false_type {};
+template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct eigen_fill_helper<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> : std::true_type {};
+template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct eigen_fill_helper<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols>> : std::true_type {};
+template <typename Xpr, int BlockRows, int BlockCols>
+struct eigen_fill_helper<Block<Xpr, BlockRows, BlockCols, /*InnerPanel*/ true>> : eigen_fill_helper<Xpr> {};
+template <typename Xpr, int BlockRows, int BlockCols>
+struct eigen_fill_helper<Block<Xpr, BlockRows, BlockCols, /*InnerPanel*/ false>>
+    : std::integral_constant<bool, eigen_fill_helper<Xpr>::value &&
+                                       (Xpr::IsRowMajor ? (BlockRows == 1) : (BlockCols == 1))> {};
+template <typename Xpr, int Options>
+struct eigen_fill_helper<Map<Xpr, Options, Stride<0, 0>>> : eigen_fill_helper<Xpr> {};
+template <typename Xpr, int Options, int OuterStride_>
+struct eigen_fill_helper<Map<Xpr, Options, Stride<OuterStride_, 0>>>
+    : std::integral_constant<bool, eigen_fill_helper<Xpr>::value &&
+                                       enum_eq_not_dynamic(OuterStride_, Xpr::InnerSizeAtCompileTime)> {};
+template <typename Xpr, int Options, int OuterStride_>
+struct eigen_fill_helper<Map<Xpr, Options, Stride<OuterStride_, 1>>>
+    : eigen_fill_helper<Map<Xpr, Options, Stride<OuterStride_, 0>>> {};
+template <typename Xpr, int Options, int InnerStride_>
+struct eigen_fill_helper<Map<Xpr, Options, InnerStride<InnerStride_>>>
+    : eigen_fill_helper<Map<Xpr, Options, Stride<0, InnerStride_>>> {};
+template <typename Xpr, int Options, int OuterStride_>
+struct eigen_fill_helper<Map<Xpr, Options, OuterStride<OuterStride_>>>
+    : eigen_fill_helper<Map<Xpr, Options, Stride<OuterStride_, 0>>> {};
+template <typename Xpr>
+struct eigen_fill_impl<Xpr, /*use_fill*/ false> {
+  using Scalar = typename Xpr::Scalar;
+  using Func = scalar_constant_op<Scalar>;
+  using PlainObject = typename Xpr::PlainObject;
+  using Constant = typename PlainObject::ConstantReturnType;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const Scalar& val) {
+    const Constant src(dst.rows(), dst.cols(), val);
+    run(dst, src);
+  }
+  template <typename SrcXpr>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) {
+    call_dense_assignment_loop(dst, src, assign_op<Scalar, Scalar>());
+  }
+};
+#if EIGEN_COMP_MSVC || defined(EIGEN_GPU_COMPILE_PHASE)
+template <typename Xpr>
+struct eigen_fill_impl<Xpr, /*use_fill*/ true> : eigen_fill_impl<Xpr, /*use_fill*/ false> {};
+#else
+template <typename Xpr>
+struct eigen_fill_impl<Xpr, /*use_fill*/ true> {
+  using Scalar = typename Xpr::Scalar;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) {
+    using std::fill_n;
+    fill_n(dst.data(), dst.size(), val);
+  }
+  template <typename SrcXpr>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) {
+    resize_if_allowed(dst, src, assign_op<Scalar, Scalar>());
+    const Scalar& val = src.functor()();
+    run(dst, val);
+  }
+};
+#endif
+template <typename Xpr>
+struct eigen_memset_helper {
+  static constexpr bool value =
+      std::is_trivially_copyable<typename Xpr::Scalar>::value && eigen_fill_helper<Xpr>::value;
+};
+template <typename Xpr>
+struct eigen_zero_impl<Xpr, /*use_memset*/ false> {
+  using Scalar = typename Xpr::Scalar;
+  using PlainObject = typename Xpr::PlainObject;
+  using Zero = typename PlainObject::ZeroReturnType;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst) {
+    const Zero src(dst.rows(), dst.cols());
+    run(dst, src);
+  }
+  template <typename SrcXpr>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) {
+    call_dense_assignment_loop(dst, src, assign_op<Scalar, Scalar>());
+  }
+};
+template <typename Xpr>
+struct eigen_zero_impl<Xpr, /*use_memset*/ true> {
+  using Scalar = typename Xpr::Scalar;
+  static constexpr size_t max_bytes = (std::numeric_limits<std::ptrdiff_t>::max)();
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) {
+    const size_t num_bytes = dst.size() * sizeof(Scalar);
+    if (num_bytes == 0) return;
+    void* dst_ptr = static_cast<void*>(dst.data());
+#ifndef EIGEN_NO_DEBUG
+    if (num_bytes > max_bytes) throw_std_bad_alloc();
+    eigen_assert((dst_ptr != nullptr) && "null pointer dereference error!");
+#endif
+    EIGEN_USING_STD(memset);
+    memset(dst_ptr, 0, num_bytes);
+  }
+  template <typename SrcXpr>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) {
+    resize_if_allowed(dst, src, assign_op<Scalar, Scalar>());
+    run(dst);
+  }
+};
+}  // namespace internal
+}  // namespace Eigen
+#endif  // EIGEN_FILL_H
--- a/eigen-master/Eigen/src/Core/ForceAlignedAccess.h
+++ b/eigen-master/Eigen/src/Core/ForceAlignedAccess.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_FORCEALIGNEDACCESS_H
+#define EIGEN_FORCEALIGNEDACCESS_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+/** \class ForceAlignedAccess
+ * \ingroup Core_Module
+ *
+ * \brief Enforce aligned packet loads and stores regardless of what is requested
+ *
+ * \param ExpressionType the type of the object of which we are forcing aligned packet access
+ *
+ * This class is the return type of MatrixBase::forceAlignedAccess()
+ * and most of the time this is the only way it is used.
+ *
+ * \sa MatrixBase::forceAlignedAccess()
+ */
+namespace internal {
+template <typename ExpressionType>
+struct traits<ForceAlignedAccess<ExpressionType>> : public traits<ExpressionType> {};
+}  // namespace internal
+template <typename ExpressionType>
+class ForceAlignedAccess : public internal::dense_xpr_base<ForceAlignedAccess<ExpressionType>>::type {
+ public:
+  typedef typename internal::dense_xpr_base<ForceAlignedAccess>::type Base;
+  EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess)
+  EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {}
+  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); }
+  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); }
+  EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_expression.outerStride(); }
+  EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_expression.innerStride(); }
+  EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const {
+    return m_expression.coeff(row, col);
+  }
+  EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) {
+    return m_expression.const_cast_derived().coeffRef(row, col);
+  }
+  EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); }
+  EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); }
+  template <int LoadMode>
+  inline const PacketScalar packet(Index row, Index col) const {
+    return m_expression.template packet<Aligned>(row, col);
+  }
+  template <int LoadMode>
+  inline void writePacket(Index row, Index col, const PacketScalar& x) {
+    m_expression.const_cast_derived().template writePacket<Aligned>(row, col, x);
+  }
+  template <int LoadMode>
+  inline const PacketScalar packet(Index index) const {
+    return m_expression.template packet<Aligned>(index);
+  }
+  template <int LoadMode>
+  inline void writePacket(Index index, const PacketScalar& x) {
+    m_expression.const_cast_derived().template writePacket<Aligned>(index, x);
+  }
+  EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; }
+ protected:
+  const ExpressionType& m_expression;
+ private:
+  ForceAlignedAccess& operator=(const ForceAlignedAccess&);
+};
+/** \returns an expression of *this with forced aligned access
+ * \sa forceAlignedAccessIf(),class ForceAlignedAccess
+ */
+template <typename Derived>
+inline const ForceAlignedAccess<Derived> MatrixBase<Derived>::forceAlignedAccess() const {
+  return ForceAlignedAccess<Derived>(derived());
+}
+/** \returns an expression of *this with forced aligned access
+ * \sa forceAlignedAccessIf(), class ForceAlignedAccess
+ */
+template <typename Derived>
+inline ForceAlignedAccess<Derived> MatrixBase<Derived>::forceAlignedAccess() {
+  return ForceAlignedAccess<Derived>(derived());
+}
+/** \returns an expression of *this with forced aligned access if \a Enable is true.
+ * \sa forceAlignedAccess(), class ForceAlignedAccess
+ */
+template <typename Derived>
+template <bool Enable>
+inline add_const_on_value_type_t<std::conditional_t<Enable, ForceAlignedAccess<Derived>, Derived&>>
+MatrixBase<Derived>::forceAlignedAccessIf() const {
+  return derived();  // FIXME This should not work but apparently is never used
+}
+/** \returns an expression of *this with forced aligned access if \a Enable is true.
+ * \sa forceAlignedAccess(), class ForceAlignedAccess
+ */
+template <typename Derived>
+template <bool Enable>
+inline std::conditional_t<Enable, ForceAlignedAccess<Derived>, Derived&> MatrixBase<Derived>::forceAlignedAccessIf() {
+  return derived();  // FIXME This should not work but apparently is never used
+}
+}  // end namespace Eigen
+#endif  // EIGEN_FORCEALIGNEDACCESS_H
--- a/eigen-master/Eigen/src/Core/Fuzzy.h
+++ b/eigen-master/Eigen/src/Core/Fuzzy.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_FUZZY_H
+#define EIGEN_FUZZY_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+template <typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isApprox_selector {
+  EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) {
+    typename internal::nested_eval<Derived, 2>::type nested(x);
+    typename internal::nested_eval<OtherDerived, 2>::type otherNested(y);
+    return (nested.matrix() - otherNested.matrix()).cwiseAbs2().sum() <=
+           prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
+  }
+};
+template <typename Derived, typename OtherDerived>
+struct isApprox_selector<Derived, OtherDerived, true> {
+  EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&) {
+    return x.matrix() == y.matrix();
+  }
+};
+template <typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isMuchSmallerThan_object_selector {
+  EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) {
+    return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
+  }
+};
+template <typename Derived, typename OtherDerived>
+struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true> {
+  EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&) {
+    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
+  }
+};
+template <typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
+struct isMuchSmallerThan_scalar_selector {
+  EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar& y,
+                                    const typename Derived::RealScalar& prec) {
+    return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
+  }
+};
+template <typename Derived>
+struct isMuchSmallerThan_scalar_selector<Derived, true> {
+  EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar&,
+                                    const typename Derived::RealScalar&) {
+    return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
+  }
+};
+}  // end namespace internal
+/** \returns \c true if \c *this is approximately equal to \a other, within the precision
+ * determined by \a prec.
+ *
+ * \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$
+ * are considered to be approximately equal within precision \f$ p \f$ if
+ * \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f]
+ * For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm
+ * L2 norm).
+ *
+ * \note Because of the multiplicativeness of this comparison, one can't use this function
+ * to check whether \c *this is approximately equal to the zero matrix or vector.
+ * Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix
+ * or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const
+ * RealScalar&, RealScalar) instead.
+ *
+ * \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const
+ */
+template <typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox(const DenseBase<OtherDerived>& other,
+                                                    const RealScalar& prec) const {
+  return internal::isApprox_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
+}
+/** \returns \c true if the norm of \c *this is much smaller than \a other,
+ * within the precision determined by \a prec.
+ *
+ * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
+ * considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if
+ * \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f]
+ *
+ * For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason,
+ * the value of the reference scalar \a other should come from the Hilbert-Schmidt norm
+ * of a reference matrix of same dimensions.
+ *
+ * \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const
+ */
+template <typename Derived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(const typename NumTraits<Scalar>::Real& other,
+                                                             const RealScalar& prec) const {
+  return internal::isMuchSmallerThan_scalar_selector<Derived>::run(derived(), other, prec);
+}
+/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other,
+ * within the precision determined by \a prec.
+ *
+ * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is
+ * considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if
+ * \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f]
+ * For matrices, the comparison is done using the Hilbert-Schmidt norm.
+ *
+ * \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const
+ */
+template <typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan(const DenseBase<OtherDerived>& other,
+                                                             const RealScalar& prec) const {
+  return internal::isMuchSmallerThan_object_selector<Derived, OtherDerived>::run(derived(), other.derived(), prec);
+}
+}  // end namespace Eigen
+#endif  // EIGEN_FUZZY_H
--- a/eigen-master/Eigen/src/Core/GeneralProduct.h
+++ b/eigen-master/Eigen/src/Core/GeneralProduct.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_GENERAL_PRODUCT_H
+#define EIGEN_GENERAL_PRODUCT_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+enum { Large = 2, Small = 3 };
+// Define the threshold value to fallback from the generic matrix-matrix product
+// implementation (heavy) to the lightweight coeff-based product one.
+// See generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
+// in products/GeneralMatrixMatrix.h for more details.
+// TODO This threshold should also be used in the compile-time selector below.
+#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD
+// This default value has been obtained on a Haswell architecture.
+#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20
+#endif
+namespace internal {
+template <int Rows, int Cols, int Depth>
+struct product_type_selector;
+template <int Size, int MaxSize>
+struct product_size_category {
+  enum {
+#ifndef EIGEN_GPU_COMPILE_PHASE
+    is_large = MaxSize == Dynamic || Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+               (Size == Dynamic && MaxSize >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
+#else
+    is_large = 0,
+#endif
+    value = is_large    ? Large
+            : Size == 1 ? 1
+                        : Small
+  };
+};
+template <typename Lhs, typename Rhs>
+struct product_type {
+  typedef remove_all_t<Lhs> Lhs_;
+  typedef remove_all_t<Rhs> Rhs_;
+  enum {
+    MaxRows = traits<Lhs_>::MaxRowsAtCompileTime,
+    Rows = traits<Lhs_>::RowsAtCompileTime,
+    MaxCols = traits<Rhs_>::MaxColsAtCompileTime,
+    Cols = traits<Rhs_>::ColsAtCompileTime,
+    MaxDepth = min_size_prefer_fixed(traits<Lhs_>::MaxColsAtCompileTime, traits<Rhs_>::MaxRowsAtCompileTime),
+    Depth = min_size_prefer_fixed(traits<Lhs_>::ColsAtCompileTime, traits<Rhs_>::RowsAtCompileTime)
+  };
+  // the splitting into different lines of code here, introducing the _select enums and the typedef below,
+  // is to work around an internal compiler error with gcc 4.1 and 4.2.
+ private:
+  enum {
+    rows_select = product_size_category<Rows, MaxRows>::value,
+    cols_select = product_size_category<Cols, MaxCols>::value,
+    depth_select = product_size_category<Depth, MaxDepth>::value
+  };
+  typedef product_type_selector<rows_select, cols_select, depth_select> selector;
+ public:
+  enum { value = selector::ret, ret = selector::ret };
+#ifdef EIGEN_DEBUG_PRODUCT
+  static void debug() {
+    EIGEN_DEBUG_VAR(Rows);
+    EIGEN_DEBUG_VAR(Cols);
+    EIGEN_DEBUG_VAR(Depth);
+    EIGEN_DEBUG_VAR(rows_select);
+    EIGEN_DEBUG_VAR(cols_select);
+    EIGEN_DEBUG_VAR(depth_select);
+    EIGEN_DEBUG_VAR(value);
+  }
+#endif
+};
+/* The following allows to select the kind of product at compile time
+ * based on the three dimensions of the product.
+ * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */
+// FIXME I'm not sure the current mapping is the ideal one.
+template <int M, int N>
+struct product_type_selector<M, N, 1> {
+  enum { ret = OuterProduct };
+};
+template <int M>
+struct product_type_selector<M, 1, 1> {
+  enum { ret = LazyCoeffBasedProductMode };
+};
+template <int N>
+struct product_type_selector<1, N, 1> {
+  enum { ret = LazyCoeffBasedProductMode };
+};
+template <int Depth>
+struct product_type_selector<1, 1, Depth> {
+  enum { ret = InnerProduct };
+};
+template <>
+struct product_type_selector<1, 1, 1> {
+  enum { ret = InnerProduct };
+};
+template <>
+struct product_type_selector<Small, 1, Small> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<1, Small, Small> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Small, Small, Small> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Small, Small, 1> {
+  enum { ret = LazyCoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Small, Large, 1> {
+  enum { ret = LazyCoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Large, Small, 1> {
+  enum { ret = LazyCoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<1, Large, Small> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<1, Large, Large> {
+  enum { ret = GemvProduct };
+};
+template <>
+struct product_type_selector<1, Small, Large> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Large, 1, Small> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Large, 1, Large> {
+  enum { ret = GemvProduct };
+};
+template <>
+struct product_type_selector<Small, 1, Large> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Small, Small, Large> {
+  enum { ret = GemmProduct };
+};
+template <>
+struct product_type_selector<Large, Small, Large> {
+  enum { ret = GemmProduct };
+};
+template <>
+struct product_type_selector<Small, Large, Large> {
+  enum { ret = GemmProduct };
+};
+template <>
+struct product_type_selector<Large, Large, Large> {
+  enum { ret = GemmProduct };
+};
+template <>
+struct product_type_selector<Large, Small, Small> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Small, Large, Small> {
+  enum { ret = CoeffBasedProductMode };
+};
+template <>
+struct product_type_selector<Large, Large, Small> {
+  enum { ret = GemmProduct };
+};
+}  // end namespace internal
+/***********************************************************************
+ *  Implementation of Inner Vector Vector Product
+ ***********************************************************************/
+// FIXME : maybe the "inner product" could return a Scalar
+// instead of a 1x1 matrix ??
+// Pro: more natural for the user
+// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix
+// product ends up to a row-vector times col-vector product... To tackle this use
+// case, we could have a specialization for Block<MatrixType,1,1> with: operator=(Scalar x);
+/***********************************************************************
+ *  Implementation of Outer Vector Vector Product
+ ***********************************************************************/
+/***********************************************************************
+ *  Implementation of General Matrix Vector Product
+ ***********************************************************************/
+/*  According to the shape/flags of the matrix we have to distinghish 3 different cases:
+ *   1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine
+ *   2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine
+ *   3 - all other cases are handled using a simple loop along the outer-storage direction.
+ *  Therefore we need a lower level meta selector.
+ *  Furthermore, if the matrix is the rhs, then the product has to be transposed.
+ */
+namespace internal {
+template <int Side, int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector;
+}  // end namespace internal
+namespace internal {
+template <typename Scalar, int Size, int MaxSize, bool Cond>
+struct gemv_static_vector_if;
+template <typename Scalar, int Size, int MaxSize>
+struct gemv_static_vector_if<Scalar, Size, MaxSize, false> {
+  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() {
+    eigen_internal_assert(false && "should never be called");
+    return 0;
+  }
+};
+template <typename Scalar, int Size>
+struct gemv_static_vector_if<Scalar, Size, Dynamic, true> {
+  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; }
+};
+template <typename Scalar, int Size, int MaxSize>
+struct gemv_static_vector_if<Scalar, Size, MaxSize, true> {
+#if EIGEN_MAX_STATIC_ALIGN_BYTES != 0
+  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize), 0, AlignedMax> m_data;
+  EIGEN_STRONG_INLINE constexpr Scalar* data() { return m_data.array; }
+#else
+  // Some architectures cannot align on the stack,
+  // => let's manually enforce alignment by allocating more data and return the address of the first aligned element.
+  internal::plain_array<Scalar, internal::min_size_prefer_fixed(Size, MaxSize) + EIGEN_MAX_ALIGN_BYTES, 0> m_data;
+  EIGEN_STRONG_INLINE constexpr Scalar* data() {
+    return reinterpret_cast<Scalar*>((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) +
+                                     EIGEN_MAX_ALIGN_BYTES);
+  }
+#endif
+};
+// The vector is on the left => transposition
+template <int StorageOrder, bool BlasCompatible>
+struct gemv_dense_selector<OnTheLeft, StorageOrder, BlasCompatible> {
+  template <typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
+    Transpose<Dest> destT(dest);
+    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
+    gemv_dense_selector<OnTheRight, OtherStorageOrder, BlasCompatible>::run(rhs.transpose(), lhs.transpose(), destT,
+                                                                            alpha);
+  }
+};
+template <>
+struct gemv_dense_selector<OnTheRight, ColMajor, true> {
+  template <typename Lhs, typename Rhs, typename Dest>
+  static inline void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
+    typedef typename Lhs::Scalar LhsScalar;
+    typedef typename Rhs::Scalar RhsScalar;
+    typedef typename Dest::Scalar ResScalar;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef Map<Matrix<ResScalar, Dynamic, 1>, plain_enum_min(AlignedMax, internal::packet_traits<ResScalar>::size)>
+        MappedDest;
+    ActualLhsType actualLhs = LhsBlasTraits::extract(lhs);
+    ActualRhsType actualRhs = RhsBlasTraits::extract(rhs);
+    ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
+    // make sure Dest is a compile-time vector type (bug 1166)
+    typedef std::conditional_t<Dest::IsVectorAtCompileTime, Dest, typename Dest::ColXpr> ActualDest;
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime == 1),
+      ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
+      MightCannotUseDest = ((!EvalToDestAtCompileTime) || ComplexByReal) && (ActualDest::MaxSizeAtCompileTime != 0)
+    };
+    typedef const_blas_data_mapper<LhsScalar, Index, ColMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar, Index, RowMajor> RhsMapper;
+    RhsScalar compatibleAlpha = get_factor<ResScalar, RhsScalar>::run(actualAlpha);
+    if (!MightCannotUseDest) {
+      // shortcut if we are sure to be able to use dest directly,
+      // this ease the compiler to generate cleaner and more optimzized code for most common cases
+      general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, LhsBlasTraits::NeedToConjugate, RhsScalar,
+                                    RhsMapper, RhsBlasTraits::NeedToConjugate>::run(actualLhs.rows(), actualLhs.cols(),
+                                                                                    LhsMapper(actualLhs.data(),
+                                                                                              actualLhs.outerStride()),
+                                                                                    RhsMapper(actualRhs.data(),
+                                                                                              actualRhs.innerStride()),
+                                                                                    dest.data(), 1, compatibleAlpha);
+    } else {
+      gemv_static_vector_if<ResScalar, ActualDest::SizeAtCompileTime, ActualDest::MaxSizeAtCompileTime,
+                            MightCannotUseDest>
+          static_dest;
+      const bool alphaIsCompatible = (!ComplexByReal) || (numext::is_exactly_zero(numext::imag(actualAlpha)));
+      const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
+      ei_declare_aligned_stack_constructed_variable(ResScalar, actualDestPtr, dest.size(),
+                                                    evalToDest ? dest.data() : static_dest.data());
+      if (!evalToDest) {
+#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+        constexpr int Size = Dest::SizeAtCompileTime;
+        Index size = dest.size();
+        EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+#endif
+        if (!alphaIsCompatible) {
+          MappedDest(actualDestPtr, dest.size()).setZero();
+          compatibleAlpha = RhsScalar(1);
+        } else
+          MappedDest(actualDestPtr, dest.size()) = dest;
+      }
+      general_matrix_vector_product<Index, LhsScalar, LhsMapper, ColMajor, LhsBlasTraits::NeedToConjugate, RhsScalar,
+                                    RhsMapper, RhsBlasTraits::NeedToConjugate>::run(actualLhs.rows(), actualLhs.cols(),
+                                                                                    LhsMapper(actualLhs.data(),
+                                                                                              actualLhs.outerStride()),
+                                                                                    RhsMapper(actualRhs.data(),
+                                                                                              actualRhs.innerStride()),
+                                                                                    actualDestPtr, 1, compatibleAlpha);
+      if (!evalToDest) {
+        if (!alphaIsCompatible)
+          dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size());
+        else
+          dest = MappedDest(actualDestPtr, dest.size());
+      }
+    }
+  }
+};
+template <>
+struct gemv_dense_selector<OnTheRight, RowMajor, true> {
+  template <typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
+    typedef typename Lhs::Scalar LhsScalar;
+    typedef typename Rhs::Scalar RhsScalar;
+    typedef typename Dest::Scalar ResScalar;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
+    typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
+    std::add_const_t<ActualLhsType> actualLhs = LhsBlasTraits::extract(lhs);
+    std::add_const_t<ActualRhsType> actualRhs = RhsBlasTraits::extract(rhs);
+    ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs);
+    enum {
+      // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
+      // on, the other hand it is good for the cache to pack the vector anyways...
+      DirectlyUseRhs =
+          ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime == 0
+    };
+    gemv_static_vector_if<RhsScalar, ActualRhsTypeCleaned::SizeAtCompileTime,
+                          ActualRhsTypeCleaned::MaxSizeAtCompileTime, !DirectlyUseRhs>
+        static_rhs;
+    ei_declare_aligned_stack_constructed_variable(
+        RhsScalar, actualRhsPtr, actualRhs.size(),
+        DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
+    if (!DirectlyUseRhs) {
+#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+      constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime;
+      Index size = actualRhs.size();
+      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
+#endif
+      Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    }
+    typedef const_blas_data_mapper<LhsScalar, Index, RowMajor> LhsMapper;
+    typedef const_blas_data_mapper<RhsScalar, Index, ColMajor> RhsMapper;
+    general_matrix_vector_product<Index, LhsScalar, LhsMapper, RowMajor, LhsBlasTraits::NeedToConjugate, RhsScalar,
+                                  RhsMapper, RhsBlasTraits::NeedToConjugate>::
+        run(actualLhs.rows(), actualLhs.cols(), LhsMapper(actualLhs.data(), actualLhs.outerStride()),
+            RhsMapper(actualRhsPtr, 1), dest.data(),
+            dest.col(0).innerStride(),  // NOTE  if dest is not a vector at compile-time, then dest.innerStride() might
+                                        // be wrong. (bug 1166)
+            actualAlpha);
+  }
+};
+template <>
+struct gemv_dense_selector<OnTheRight, ColMajor, false> {
+  template <typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
+    EIGEN_STATIC_ASSERT((!nested_eval<Lhs, 1>::Evaluate),
+                        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+    // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory,
+    // otherwise use a temp
+    typename nested_eval<Rhs, 1>::type actual_rhs(rhs);
+    const Index size = rhs.rows();
+    for (Index k = 0; k < size; ++k) dest += (alpha * actual_rhs.coeff(k)) * lhs.col(k);
+  }
+};
+template <>
+struct gemv_dense_selector<OnTheRight, RowMajor, false> {
+  template <typename Lhs, typename Rhs, typename Dest>
+  static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) {
+    EIGEN_STATIC_ASSERT((!nested_eval<Lhs, 1>::Evaluate),
+                        EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE);
+    typename nested_eval<Rhs, Lhs::RowsAtCompileTime>::type actual_rhs(rhs);
+    const Index rows = dest.rows();
+    for (Index i = 0; i < rows; ++i)
+      dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum();
+  }
+};
+}  // end namespace internal
+/***************************************************************************
+ * Implementation of matrix base methods
+ ***************************************************************************/
+/** \returns the matrix product of \c *this and \a other.
+ *
+ * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*().
+ *
+ * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
+ */
+template <typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product<Derived, OtherDerived> MatrixBase<Derived>::operator*(
+    const MatrixBase<OtherDerived>& other) const {
+  // A note regarding the function declaration: In MSVC, this function will sometimes
+  // not be inlined since DenseStorage is an unwindable object for dynamic
+  // matrices and product types are holding a member to store the result.
+  // Thus it does not help tagging this function with EIGEN_STRONG_INLINE.
+  enum {
+    ProductIsValid = Derived::ColsAtCompileTime == Dynamic || OtherDerived::RowsAtCompileTime == Dynamic ||
+                     int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived, OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
+  EIGEN_STATIC_ASSERT(
+      ProductIsValid || !(AreVectors && SameSizes),
+      INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+                      INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+#ifdef EIGEN_DEBUG_PRODUCT
+  internal::product_type<Derived, OtherDerived>::debug();
+#endif
+  return Product<Derived, OtherDerived>(derived(), other.derived());
+}
+/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
+ *
+ * The returned product will behave like any other expressions: the coefficients of the product will be
+ * computed once at a time as requested. This might be useful in some extremely rare cases when only
+ * a small and no coherent fraction of the result's coefficients have to be computed.
+ *
+ * \warning This version of the matrix product can be much much slower. So use it only if you know
+ * what you are doing and that you measured a true speed improvement.
+ *
+ * \sa operator*(const MatrixBase&)
+ */
+template <typename Derived>
+template <typename OtherDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product<Derived, OtherDerived, LazyProduct>
+MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived>& other) const {
+  enum {
+    ProductIsValid = Derived::ColsAtCompileTime == Dynamic || OtherDerived::RowsAtCompileTime == Dynamic ||
+                     int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime),
+    AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime,
+    SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived, OtherDerived)
+  };
+  // note to the lost user:
+  //    * for a dot product use: v1.dot(v2)
+  //    * for a coeff-wise product use: v1.cwiseProduct(v2)
+  EIGEN_STATIC_ASSERT(
+      ProductIsValid || !(AreVectors && SameSizes),
+      INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS)
+  EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors),
+                      INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION)
+  EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT)
+  return Product<Derived, OtherDerived, LazyProduct>(derived(), other.derived());
+}
+}  // end namespace Eigen
+#endif  // EIGEN_PRODUCT_H
--- a/eigen-master/Eigen/src/Core/GenericPacketMath.h
+++ b/eigen-master/Eigen/src/Core/GenericPacketMath.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_GENERIC_PACKET_MATH_H
+#define EIGEN_GENERIC_PACKET_MATH_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+/** \internal
+ * \file GenericPacketMath.h
+ *
+ * Default implementation for types not supported by the vectorization.
+ * In practice these functions are provided to make easier the writing
+ * of generic vectorized code.
+ */
+#ifndef EIGEN_DEBUG_ALIGNED_LOAD
+#define EIGEN_DEBUG_ALIGNED_LOAD
+#endif
+#ifndef EIGEN_DEBUG_UNALIGNED_LOAD
+#define EIGEN_DEBUG_UNALIGNED_LOAD
+#endif
+#ifndef EIGEN_DEBUG_ALIGNED_STORE
+#define EIGEN_DEBUG_ALIGNED_STORE
+#endif
+#ifndef EIGEN_DEBUG_UNALIGNED_STORE
+#define EIGEN_DEBUG_UNALIGNED_STORE
+#endif
+struct default_packet_traits {
+  enum {
+    // Ops that are implemented for most types.
+    HasAdd = 1,
+    HasSub = 1,
+    HasShift = 1,
+    HasMul = 1,
+    HasNegate = 1,
+    HasAbs = 1,
+    HasAbs2 = 1,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 1,
+    HasSetLinear = 1,
+    HasSign = 1,
+    // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet
+    // types
+    HasRound = 1,
+    HasArg = 0,
+    HasAbsDiff = 0,
+    HasBlend = 0,
+    // This flag is used to indicate whether packet comparison is supported.
+    // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
+    HasCmp = 0,
+    HasDiv = 0,
+    HasReciprocal = 0,
+    HasSqrt = 0,
+    HasRsqrt = 0,
+    HasCbrt = 0,
+    HasExp = 0,
+    HasExpm1 = 0,
+    HasLog = 0,
+    HasLog1p = 0,
+    HasLog10 = 0,
+    HasPow = 0,
+    HasSin = 0,
+    HasCos = 0,
+    HasTan = 0,
+    HasASin = 0,
+    HasACos = 0,
+    HasATan = 0,
+    HasATanh = 0,
+    HasSinh = 0,
+    HasCosh = 0,
+    HasTanh = 0,
+    HasLGamma = 0,
+    HasDiGamma = 0,
+    HasZeta = 0,
+    HasPolygamma = 0,
+    HasErf = 0,
+    HasErfc = 0,
+    HasNdtri = 0,
+    HasBessel = 0,
+    HasIGamma = 0,
+    HasIGammaDerA = 0,
+    HasGammaSampleDerAlpha = 0,
+    HasIGammac = 0,
+    HasBetaInc = 0
+  };
+};
+template <typename T>
+struct packet_traits : default_packet_traits {
+  typedef T type;
+  typedef T half;
+  enum {
+    Vectorizable = 0,
+    size = 1,
+    AlignedOnScalar = 0,
+  };
+  enum {
+    HasAdd = 0,
+    HasSub = 0,
+    HasMul = 0,
+    HasNegate = 0,
+    HasAbs = 0,
+    HasAbs2 = 0,
+    HasMin = 0,
+    HasMax = 0,
+    HasConj = 0,
+    HasSetLinear = 0
+  };
+};
+template <typename T>
+struct packet_traits<const T> : packet_traits<T> {};
+template <typename T>
+struct unpacket_traits {
+  typedef T type;
+  typedef T half;
+  typedef typename numext::get_integer_by_size<sizeof(T)>::signed_type integer_packet;
+  enum {
+    size = 1,
+    alignment = alignof(T),
+    vectorizable = false,
+    masked_load_available = false,
+    masked_store_available = false
+  };
+};
+template <typename T>
+struct unpacket_traits<const T> : unpacket_traits<T> {};
+/** \internal A convenience utility for determining if the type is a scalar.
+ * This is used to enable some generic packet implementations.
+ */
+template <typename Packet>
+struct is_scalar {
+  using Scalar = typename unpacket_traits<Packet>::type;
+  enum { value = internal::is_same<Packet, Scalar>::value };
+};
+// automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
+// 1) the packets are the same type, or
+// 2) the packets differ only in sign.
+// In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
+template <typename SrcPacket, typename TgtPacket,
+          bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
+struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
+template <>
+struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
+template <>
+struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
+template <>
+struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
+template <>
+struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
+template <typename SrcPacket, typename TgtPacket>
+struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
+  using SrcScalar = typename unpacket_traits<SrcPacket>::type;
+  static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
+  using TgtScalar = typename unpacket_traits<TgtPacket>::type;
+  static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
+  static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
+};
+// is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
+template <typename SrcPacket, typename TgtPacket>
+struct is_degenerate {
+  static constexpr bool value =
+      is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
+};
+template <typename Packet>
+struct is_half {
+  using Scalar = typename unpacket_traits<Packet>::type;
+  static constexpr int Size = unpacket_traits<Packet>::size;
+  using DefaultPacket = typename packet_traits<Scalar>::type;
+  static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
+  static constexpr bool value = Size != 1 && Size < DefaultSize;
+};
+template <typename Src, typename Tgt>
+struct type_casting_traits {
+  enum {
+    VectorizedCast =
+        is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
+    SrcCoeffRatio = 1,
+    TgtCoeffRatio = 1
+  };
+};
+// provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types
+template <typename Src, typename Tgt>
+struct vectorized_type_casting_traits {
+  enum : int {
+    DefaultSrcPacketSize = packet_traits<Src>::size,
+    DefaultTgtPacketSize = packet_traits<Tgt>::size,
+    VectorizedCast = 1,
+    SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1),
+    TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1)
+  };
+};
+/** \internal Wrapper to ensure that multiple packet types can map to the same
+    same underlying vector type. */
+template <typename T, int unique_id = 0>
+struct eigen_packet_wrapper {
+  EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
+  EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
+  EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
+  EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {}
+  EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) {
+    m_val = v;
+    return *this;
+  }
+  T m_val;
+};
+template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
+struct preinterpret_generic;
+template <typename Target, typename Packet>
+struct preinterpret_generic<Target, Packet, false> {
+  // the packets are not the same, attempt scalar bit_cast
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
+    return numext::bit_cast<Target, Packet>(a);
+  }
+};
+template <typename Packet>
+struct preinterpret_generic<Packet, Packet, true> {
+  // the packets are the same type: do nothing
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
+};
+/** \internal \returns reinterpret_cast<Target>(a) */
+template <typename Target, typename Packet>
+EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
+  return preinterpret_generic<Target, Packet>::run(a);
+}
+template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value,
+          bool TgtIsHalf = is_half<TgtPacket>::value>
+struct pcast_generic;
+template <typename SrcPacket, typename TgtPacket>
+struct pcast_generic<SrcPacket, TgtPacket, false, false> {
+  // the packets are not degenerate: attempt scalar static_cast
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
+    return cast_impl<SrcPacket, TgtPacket>::run(a);
+  }
+};
+template <typename Packet>
+struct pcast_generic<Packet, Packet, true, false> {
+  // the packets are the same: do nothing
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
+};
+template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
+struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
+  // the packets are degenerate: preinterpret is equivalent to pcast
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
+};
+/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
+  return pcast_generic<SrcPacket, TgtPacket>::run(a);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
+  return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
+                                         const SrcPacket& d) {
+  return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
+}
+template <typename SrcPacket, typename TgtPacket>
+EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
+                                         const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
+                                         const SrcPacket& h) {
+  return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
+}
+template <typename SrcPacket, typename TgtPacket>
+struct pcast_generic<SrcPacket, TgtPacket, false, true> {
+  // TgtPacket is a half packet of some other type
+  // perform cast and truncate result
+  using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
+    return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
+  }
+};
+/** \internal \returns a + b (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) {
+  return a + b;
+}
+// Avoid compiler warning for boolean algebra.
+template <>
+EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) {
+  return a || b;
+}
+/** \internal \returns a packet version of \a *from, (un-aligned masked add)
+ * There is no generic implementation. We only have implementations for specialized
+ * cases. Generic case should not be called.
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet> padd(
+    const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
+/** \internal \returns a - b (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) {
+  return a - b;
+}
+/** \internal \returns -a (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) {
+  EIGEN_STATIC_ASSERT((!is_same<typename unpacket_traits<Packet>::type, bool>::value),
+                      NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
+  return numext::negate(a);
+}
+/** \internal \returns conj(a) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) {
+  return numext::conj(a);
+}
+/** \internal \returns a * b (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) {
+  return a * b;
+}
+// Avoid compiler warning for boolean algebra.
+template <>
+EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) {
+  return a && b;
+}
+/** \internal \returns a / b (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) {
+  return a / b;
+}
+// Avoid compiler warning for boolean algebra.
+template <>
+EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) {
+  return a && b;
+}
+// In the generic case, memset to all one bits.
+template <typename Packet, typename EnableIf = void>
+struct ptrue_impl {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
+    Packet b;
+    memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
+    return b;
+  }
+};
+// For booleans, we can only directly set a valid `bool` value to avoid UB.
+template <>
+struct ptrue_impl<bool, void> {
+  static EIGEN_DEVICE_FUNC inline bool run(const bool& /*a*/) { return true; }
+};
+// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
+// Although this is technically not a valid bitmask, the scalar path for pselect
+// uses a comparison to zero, so this should still work in most cases. We don't
+// have another option, since the scalar type requires initialization.
+template <typename T>
+struct ptrue_impl<T, std::enable_if_t<is_scalar<T>::value && NumTraits<T>::RequireInitialization>> {
+  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(1); }
+};
+/** \internal \returns one bits. */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) {
+  return ptrue_impl<Packet>::run(a);
+}
+// In the general case, memset to zero.
+template <typename Packet, typename EnableIf = void>
+struct pzero_impl {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
+    Packet b;
+    memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
+    return b;
+  }
+};
+// For scalars, explicitly set to Scalar(0), since the underlying representation
+// for zero may not consist of all-zero bits.
+template <typename T>
+struct pzero_impl<T, std::enable_if_t<is_scalar<T>::value>> {
+  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); }
+};
+/** \internal \returns packet of zeros */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
+  return pzero_impl<Packet>::run(a);
+}
+/** \internal \returns a <= b as a bit mask */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
+  return a <= b ? ptrue(a) : pzero(a);
+}
+/** \internal \returns a < b as a bit mask */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
+  return a < b ? ptrue(a) : pzero(a);
+}
+/** \internal \returns a == b as a bit mask */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
+  return a == b ? ptrue(a) : pzero(a);
+}
+/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
+  return a >= b ? pzero(a) : ptrue(a);
+}
+template <typename T>
+struct bit_and {
+  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
+};
+template <typename T>
+struct bit_or {
+  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; }
+};
+template <typename T>
+struct bit_xor {
+  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; }
+};
+template <typename T>
+struct bit_not {
+  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; }
+};
+template <>
+struct bit_and<bool> {
+  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; }
+};
+template <>
+struct bit_or<bool> {
+  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; }
+};
+template <>
+struct bit_xor<bool> {
+  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; }
+};
+template <>
+struct bit_not<bool> {
+  EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; }
+};
+// Use operators &, |, ^, ~.
+template <typename T>
+struct operator_bitwise_helper {
+  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
+  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
+  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
+  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
+};
+// Apply binary operations byte-by-byte
+template <typename T>
+struct bytewise_bitwise_helper {
+  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
+    return binary(a, b, bit_and<unsigned char>());
+  }
+  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or<unsigned char>()); }
+  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
+    return binary(a, b, bit_xor<unsigned char>());
+  }
+  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not<unsigned char>()); }
+ private:
+  template <typename Op>
+  EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
+    const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
+    T c;
+    unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
+    for (size_t i = 0; i < sizeof(T); ++i) {
+      *c_ptr++ = op(*a_ptr++);
+    }
+    return c;
+  }
+  template <typename Op>
+  EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
+    const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
+    const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
+    T c;
+    unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
+    for (size_t i = 0; i < sizeof(T); ++i) {
+      *c_ptr++ = op(*a_ptr++, *b_ptr++);
+    }
+    return c;
+  }
+};
+// In the general case, use byte-by-byte manipulation.
+template <typename T, typename EnableIf = void>
+struct bitwise_helper : public bytewise_bitwise_helper<T> {};
+// For integers or non-trivial scalars, use binary operators.
+template <typename T>
+struct bitwise_helper<T, typename std::enable_if_t<is_scalar<T>::value &&
+                                                   (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>>
+    : public operator_bitwise_helper<T> {};
+/** \internal \returns the bitwise and of \a a and \a b */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) {
+  return bitwise_helper<Packet>::bitwise_and(a, b);
+}
+/** \internal \returns the bitwise or of \a a and \a b */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) {
+  return bitwise_helper<Packet>::bitwise_or(a, b);
+}
+/** \internal \returns the bitwise xor of \a a and \a b */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) {
+  return bitwise_helper<Packet>::bitwise_xor(a, b);
+}
+/** \internal \returns the bitwise not of \a a */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) {
+  return bitwise_helper<Packet>::bitwise_not(a);
+}
+/** \internal \returns the bitwise and of \a a and not \a b */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
+  return pand(a, pnot(b));
+}
+// In the general case, use bitwise select.
+template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
+struct pselect_impl {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
+    return por(pand(a, mask), pandnot(b, mask));
+  }
+};
+// For scalars, use ternary select.
+template <typename Packet>
+struct pselect_impl<Packet, true> {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
+    return numext::select(mask, a, b);
+  }
+};
+/** \internal \returns \a or \b for each field in packet according to \mask */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) {
+  return pselect_impl<Packet>::run(mask, a, b);
+}
+template <>
+EIGEN_DEVICE_FUNC inline bool pselect<bool>(const bool& cond, const bool& a, const bool& b) {
+  return cond ? a : b;
+}
+/** \internal \returns the min or of \a a and \a b (coeff-wise)
+    If either \a a or \a b are NaN, the result is implementation defined. */
+template <int NaNPropagation>
+struct pminmax_impl {
+  template <typename Packet, typename Op>
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
+    return op(a, b);
+  }
+};
+/** \internal \returns the min or max of \a a and \a b (coeff-wise)
+    If either \a a or \a b are NaN, NaN is returned. */
+template <>
+struct pminmax_impl<PropagateNaN> {
+  template <typename Packet, typename Op>
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
+    Packet not_nan_mask_a = pcmp_eq(a, a);
+    Packet not_nan_mask_b = pcmp_eq(b, b);
+    return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a);
+  }
+};
+/** \internal \returns the min or max of \a a and \a b (coeff-wise)
+    If both \a a and \a b are NaN, NaN is returned.
+    Equivalent to std::fmin(a, b).  */
+template <>
+struct pminmax_impl<PropagateNumbers> {
+  template <typename Packet, typename Op>
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
+    Packet not_nan_mask_a = pcmp_eq(a, a);
+    Packet not_nan_mask_b = pcmp_eq(b, b);
+    return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b);
+  }
+};
+#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& a, const Type& b) { return Func(a, b); }
+/** \internal \returns the min of \a a and \a b  (coeff-wise).
+    If \a a or \b b is NaN, the return value is implementation defined. */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
+  return numext::mini(a, b);
+}
+/** \internal \returns the min of \a a and \a b  (coeff-wise).
+    NaNPropagation determines the NaN propagation semantics. */
+template <int NaNPropagation, typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
+  return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
+}
+/** \internal \returns the max of \a a and \a b  (coeff-wise)
+    If \a a or \b b is NaN, the return value is implementation defined. */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
+  return numext::maxi(a, b);
+}
+/** \internal \returns the max of \a a and \a b  (coeff-wise).
+    NaNPropagation determines the NaN propagation semantics. */
+template <int NaNPropagation, typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
+  return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax<Packet>)));
+}
+/** \internal \returns the absolute value of \a a */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) {
+  return numext::abs(a);
+}
+template <>
+EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) {
+  return a;
+}
+template <>
+EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) {
+  return a;
+}
+template <>
+EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) {
+  return a;
+}
+/** \internal \returns the addsub value of \a a,b */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) {
+  return pselect(peven_mask(a), padd(a, b), psub(a, b));
+}
+/** \internal \returns the phase angle of \a a */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) {
+  using numext::arg;
+  return arg(a);
+}
+/** \internal \returns \a a arithmetically shifted by N bits to the right */
+template <int N, typename T>
+EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) {
+  return numext::arithmetic_shift_right(a, N);
+}
+/** \internal \returns \a a logically shifted by N bits to the right */
+template <int N, typename T>
+EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) {
+  return numext::logical_shift_right(a, N);
+}
+/** \internal \returns \a a shifted by N bits to the left */
+template <int N, typename T>
+EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) {
+  return numext::logical_shift_left(a, N);
+}
+/** \internal \returns the significant and exponent of the underlying floating point numbers
+ * See https://en.cppreference.com/w/cpp/numeric/math/frexp
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
+  int exp;
+  EIGEN_USING_STD(frexp);
+  Packet result = static_cast<Packet>(frexp(a, &exp));
+  exponent = static_cast<Packet>(exp);
+  return result;
+}
+/** \internal \returns a * 2^((int)exponent)
+ * See https://en.cppreference.com/w/cpp/numeric/math/ldexp
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) {
+  EIGEN_USING_STD(ldexp)
+  return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
+}
+/** \internal \returns the min of \a a and \a b  (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pabsdiff(const Packet& a, const Packet& b) {
+  return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b));
+}
+/** \internal \returns a packet version of \a *from, from must be properly aligned */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits<Packet>::type* from) {
+  return *from;
+}
+/** \internal \returns n elements of a packet version of \a *from, from must be properly aligned
+ * offset indicates the starting element in which to load and
+ * offset + n <= unpacket_traits::size
+ * All elements before offset and after the last element loaded will initialized with zero */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
+                                              const Index offset = 0) {
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
+  for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
+    elements[i] = from[i - offset];
+  }
+  return pload<Packet>(elements);
+}
+/** \internal \returns a packet version of \a *from, (un-aligned load) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits<Packet>::type* from) {
+  return *from;
+}
+/** \internal \returns n elements of a packet version of \a *from, (un-aligned load)
+ * All elements after the last element loaded will initialized with zero */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n,
+                                               const Index offset = 0) {
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
+  for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) {
+    elements[i] = from[i - offset];
+  }
+  return pload<Packet>(elements);
+}
+/** \internal \returns a packet version of \a *from, (un-aligned masked load)
+ * There is no generic implementation. We only have implementations for specialized
+ * cases. Generic case should not be called.
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet> ploadu(
+    const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
+/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits<Packet>::type& a) {
+  return a;
+}
+/** \internal \returns a packet with constant coefficients set from bits */
+template <typename Packet, typename BitsType>
+EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a);
+/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits<Packet>::type* a) {
+  return pset1<Packet>(*a);
+}
+/** \internal \returns a packet with elements of \a *from duplicated.
+ * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and
+ * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]}
+ * Currently, this function is only used for scalar * complex products.
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits<Packet>::type* from) {
+  return *from;
+}
+/** \internal \returns a packet with elements of \a *from quadrupled.
+ * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and
+ * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]}
+ * Currently, this function is only used in matrix products.
+ * For packet-size smaller or equal to 4, this function is equivalent to pload1
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits<Packet>::type* from) {
+  return pload1<Packet>(from);
+}
+/** \internal equivalent to
+ * \code
+ * a0 = pload1(a+0);
+ * a1 = pload1(a+1);
+ * a2 = pload1(a+2);
+ * a3 = pload1(a+3);
+ * \endcode
+ * \sa pset1, pload1, ploaddup, pbroadcast2
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1,
+                                          Packet& a2, Packet& a3) {
+  a0 = pload1<Packet>(a + 0);
+  a1 = pload1<Packet>(a + 1);
+  a2 = pload1<Packet>(a + 2);
+  a3 = pload1<Packet>(a + 3);
+}
+/** \internal equivalent to
+ * \code
+ * a0 = pload1(a+0);
+ * a1 = pload1(a+1);
+ * \endcode
+ * \sa pset1, pload1, ploaddup, pbroadcast4
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits<Packet>::type* a, Packet& a0, Packet& a1) {
+  a0 = pload1<Packet>(a + 0);
+  a1 = pload1<Packet>(a + 1);
+}
+/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits<Packet>::type& a) {
+  return a;
+}
+/** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0),
+     where x is the value of all 1-bits. */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& /*a*/) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  const size_t n = unpacket_traits<Packet>::size;
+  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
+  for (size_t i = 0; i < n; ++i) {
+    memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
+  }
+  return ploadu<Packet>(elements);
+}
+/** \internal copy the packet \a from to \a *to, \a to must be properly aligned */
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) {
+  (*to) = from;
+}
+/** \internal copy n elements of the packet \a from to \a *to, \a to must be properly aligned
+ * offset indicates the starting element in which to store and
+ * offset + n <= unpacket_traits::size */
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
+  EIGEN_ALIGN_MAX Scalar elements[packet_size];
+  pstore<Scalar>(elements, from);
+  for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
+    to[i] = elements[i + offset];
+  }
+}
+/** \internal copy the packet \a from to \a *to, (un-aligned store) */
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) {
+  (*to) = from;
+}
+/** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) {
+  const Index packet_size = unpacket_traits<Packet>::size;
+  eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
+  EIGEN_ALIGN_MAX Scalar elements[packet_size];
+  pstore<Scalar>(elements, from);
+  for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) {
+    to[i] = elements[i + offset];
+  }
+}
+/** \internal copy the packet \a from to \a *to, (un-aligned store with a mask)
+ * There is no generic implementation. We only have implementations for specialized
+ * cases. Generic case should not be called.
+ */
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void> pstoreu(
+    Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) {
+  return ploadu<Packet>(from);
+}
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) {
+  const Index packet_size = unpacket_traits<Packet>::size;
+  EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)};
+  for (Index i = 0; i < numext::mini(n, packet_size); i++) {
+    elements[i] = from[i * stride];
+  }
+  return pload<Packet>(elements);
+}
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) {
+  pstore(to, from);
+}
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) {
+  const Index packet_size = unpacket_traits<Packet>::size;
+  EIGEN_ALIGN_MAX Scalar elements[packet_size];
+  pstore<Scalar>(elements, from);
+  for (Index i = 0; i < numext::mini(n, packet_size); i++) {
+    to[i * stride] = elements[i];
+  }
+}
+/** \internal tries to do cache prefetching of \a addr */
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) {
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+  // do nothing
+#elif defined(EIGEN_CUDA_ARCH)
+#if defined(__LP64__) || EIGEN_OS_WIN64
+  // 64-bit pointer operand constraint for inlined asm
+  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
+#else
+  // 32-bit pointer operand constraint for inlined asm
+  asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
+#endif
+#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
+  __builtin_prefetch(addr);
+#endif
+}
+/** \internal \returns the reversed elements of \a a*/
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) {
+  return a;
+}
+/** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) {
+  return Packet(numext::imag(a), numext::real(a));
+}
+/**************************
+ * Special math functions
+ ***************************/
+/** \internal \returns isnan(a) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) {
+  return pandnot(ptrue(a), pcmp_eq(a, a));
+}
+/** \internal \returns isinf(a) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) {
+  using Scalar = typename unpacket_traits<Packet>::type;
+  constexpr Scalar inf = NumTraits<Scalar>::infinity();
+  return pcmp_eq(pabs(a), pset1<Packet>(inf));
+}
+/** \internal \returns the sine of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) {
+  EIGEN_USING_STD(sin);
+  return sin(a);
+}
+/** \internal \returns the cosine of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) {
+  EIGEN_USING_STD(cos);
+  return cos(a);
+}
+/** \internal \returns the tan of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) {
+  EIGEN_USING_STD(tan);
+  return tan(a);
+}
+/** \internal \returns the arc sine of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) {
+  EIGEN_USING_STD(asin);
+  return asin(a);
+}
+/** \internal \returns the arc cosine of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) {
+  EIGEN_USING_STD(acos);
+  return acos(a);
+}
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) {
+  EIGEN_USING_STD(sinh);
+  return sinh(a);
+}
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) {
+  EIGEN_USING_STD(cosh);
+  return cosh(a);
+}
+/** \internal \returns the arc tangent of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) {
+  EIGEN_USING_STD(atan);
+  return atan(a);
+}
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) {
+  EIGEN_USING_STD(tanh);
+  return tanh(a);
+}
+/** \internal \returns the arc tangent of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) {
+  EIGEN_USING_STD(atanh);
+  return atanh(a);
+}
+/** \internal \returns the exp of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) {
+  return numext::exp(a);
+}
+/** \internal \returns the exp2 of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) {
+  return numext::exp2(a);
+}
+/** \internal \returns the expm1 of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) {
+  return numext::expm1(a);
+}
+/** \internal \returns the log of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) {
+  EIGEN_USING_STD(log);
+  return log(a);
+}
+/** \internal \returns the log1p of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) {
+  return numext::log1p(a);
+}
+/** \internal \returns the log10 of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) {
+  EIGEN_USING_STD(log10);
+  return log10(a);
+}
+/** \internal \returns the log2 of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) {
+  using Scalar = typename internal::unpacket_traits<Packet>::type;
+  using RealScalar = typename NumTraits<Scalar>::Real;
+  return pmul(pset1<Packet>(Scalar(RealScalar(EIGEN_LOG2E))), plog(a));
+}
+/** \internal \returns the square-root of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) {
+  return numext::sqrt(a);
+}
+/** \internal \returns the cube-root of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) {
+  return numext::cbrt(a);
+}
+template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
+          bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
+struct nearest_integer_packetop_impl {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); }
+};
+/** \internal \returns the rounded value of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) {
+  return nearest_integer_packetop_impl<Packet>::run_round(a);
+}
+/** \internal \returns the floor of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) {
+  return nearest_integer_packetop_impl<Packet>::run_floor(a);
+}
+/** \internal \returns the rounded value of \a a (coeff-wise) with current
+ * rounding mode */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) {
+  return nearest_integer_packetop_impl<Packet>::run_rint(a);
+}
+/** \internal \returns the ceil of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) {
+  return nearest_integer_packetop_impl<Packet>::run_ceil(a);
+}
+/** \internal \returns the truncation of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) {
+  return nearest_integer_packetop_impl<Packet>::run_trunc(a);
+}
+template <typename Packet, typename EnableIf = void>
+struct psign_impl {
+  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); }
+};
+/** \internal \returns the sign of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) {
+  return psign_impl<Packet>::run(a);
+}
+template <>
+EIGEN_DEVICE_FUNC inline bool psign(const bool& a) {
+  return a;
+}
+/** \internal \returns the first element of a packet */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a) {
+  return a;
+}
+/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4.
+ * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7}
+ * For packet-size smaller or equal to 4, this boils down to a noop.
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size % 8) == 0,
+                                            typename unpacket_traits<Packet>::half, Packet>
+predux_half_dowto4(const Packet& a) {
+  return a;
+}
+// Slow generic implementation of Packet reduction.
+template <typename Packet, typename Op>
+EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_helper(const Packet& a, Op op) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  const size_t n = unpacket_traits<Packet>::size;
+  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
+  pstoreu<Scalar>(elements, a);
+  for (size_t k = n / 2; k > 0; k /= 2) {
+    for (size_t i = 0; i < k; ++i) {
+      elements[i] = op(elements[i], elements[i + k]);
+    }
+  }
+  return elements[0];
+}
+/** \internal \returns the sum of the elements of \a a*/
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a) {
+  return a;
+}
+/** \internal \returns the product of the elements of \a a */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
+}
+/** \internal \returns the min of the elements of \a a */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
+}
+template <int NaNPropagation, typename Packet>
+EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
+}
+/** \internal \returns the min of the elements of \a a */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
+}
+template <int NaNPropagation, typename Packet>
+EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a) {
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
+}
+#undef EIGEN_BINARY_OP_NAN_PROPAGATION
+/** \internal \returns true if all coeffs of \a a means "true"
+ * It is supposed to be called on values returned by pcmp_*.
+ */
+// not needed yet
+// template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
+// { return bool(a); }
+/** \internal \returns true if any coeffs of \a a means "true"
+ * It is supposed to be called on values returned by pcmp_*.
+ */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) {
+  // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
+  // It is expected that "true" is either:
+  //  - Scalar(1)
+  //  - bits full of ones (NaN for floats),
+  //  - or first bit equals to 1 (1 for ints, smallest denormal for floats).
+  // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  return numext::not_equal_strict(predux(a), Scalar(0));
+}
+/***************************************************************************
+ * The following functions might not have to be overwritten for vectorized types
+ ***************************************************************************/
+template <typename Packet, typename EnableIf = void>
+struct pmadd_impl {
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
+    return padd(pmul(a, b), c);
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
+    return psub(pmul(a, b), c);
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
+    return psub(c, pmul(a, b));
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
+    return pnegate(pmadd(a, b, c));
+  }
+};
+template <typename Scalar>
+struct pmadd_impl<Scalar, std::enable_if_t<is_scalar<Scalar>::value && NumTraits<Scalar>::IsSigned>> {
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
+    return numext::fma(a, b, c);
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
+    return numext::fma(a, b, Scalar(-c));
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) {
+    return numext::fma(Scalar(-a), b, c);
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) {
+    return -Scalar(numext::fma(a, b, c));
+  }
+};
+// FMA instructions.
+/** \internal \returns a * b + c (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) {
+  return pmadd_impl<Packet>::pmadd(a, b, c);
+}
+/** \internal \returns a * b - c (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) {
+  return pmadd_impl<Packet>::pmsub(a, b, c);
+}
+/** \internal \returns -(a * b) + c (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) {
+  return pmadd_impl<Packet>::pnmadd(a, b, c);
+}
+/** \internal \returns -((a * b + c) (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) {
+  return pmadd_impl<Packet>::pnmsub(a, b, c);
+}
+/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned
+ */
+// NOTE: this function must really be templated on the packet type (think about different packet types for the same
+// scalar type)
+template <typename Packet>
+inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a) {
+  pstore(to, pset1<Packet>(a));
+}
+/** \internal \returns a packet version of \a *from.
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template <typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from) {
+  if (Alignment >= unpacket_traits<Packet>::alignment)
+    return pload<Packet>(from);
+  else
+    return ploadu<Packet>(from);
+}
+/** \internal \returns n elements of a packet version of \a *from.
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template <typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from,
+                                                            const Index n, const Index offset = 0) {
+  if (Alignment >= unpacket_traits<Packet>::alignment)
+    return pload_partial<Packet>(from, n, offset);
+  else
+    return ploadu_partial<Packet>(from, n, offset);
+}
+/** \internal copy the packet \a from to \a *to.
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template <typename Scalar, typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) {
+  if (Alignment >= unpacket_traits<Packet>::alignment)
+    pstore(to, from);
+  else
+    pstoreu(to, from);
+}
+/** \internal copy n elements of the packet \a from to \a *to.
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+template <typename Scalar, typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n,
+                                                           const Index offset = 0) {
+  if (Alignment >= unpacket_traits<Packet>::alignment)
+    pstore_partial(to, from, n, offset);
+  else
+    pstoreu_partial(to, from, n, offset);
+}
+/** \internal \returns a packet version of \a *from.
+ * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the
+ * hardware if available to speedup the loading of data that won't be modified
+ * by the current computation.
+ */
+template <typename Packet, int LoadMode>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from) {
+  return ploadt<Packet, LoadMode>(from);
+}
+/***************************************************************************
+ * Fast complex products (GCC generates a function call which is very slow)
+ ***************************************************************************/
+// Eigen+CUDA does not support complexes.
+#if !defined(EIGEN_GPUCC)
+template <>
+inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b) {
+  return std::complex<float>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
+}
+template <>
+inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b) {
+  return std::complex<double>(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag());
+}
+#endif
+/***************************************************************************
+ * PacketBlock, that is a collection of N packets where the number of words
+ * in the packet is a multiple of N.
+ ***************************************************************************/
+template <typename Packet, int N = unpacket_traits<Packet>::size>
+struct PacketBlock {
+  Packet packet[N];
+};
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet, 1>& /*kernel*/) {
+  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
+}
+/***************************************************************************
+ * Selector, i.e. vector of N boolean values used to select (i.e. blend)
+ * words from 2 packets.
+ ***************************************************************************/
+template <size_t N>
+struct Selector {
+  bool select[N];
+};
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket,
+                                       const Packet& thenPacket, const Packet& elsePacket) {
+  return ifPacket.select[0] ? thenPacket : elsePacket;
+}
+/** \internal \returns 1 / a (coeff-wise) */
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
+  using Scalar = typename unpacket_traits<Packet>::type;
+  return pdiv(pset1<Packet>(Scalar(1)), a);
+}
+/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
+template <typename Packet>
+EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) {
+  return preciprocal<Packet>(psqrt(a));
+}
+template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
+          bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
+struct psignbit_impl;
+template <typename Packet, bool IsInteger>
+struct psignbit_impl<Packet, true, IsInteger> {
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
+};
+template <typename Packet>
+struct psignbit_impl<Packet, false, false> {
+  // generic implementation if not specialized in PacketMath.h
+  // slower than arithmetic shift
+  typedef typename unpacket_traits<Packet>::type Scalar;
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
+    const Packet cst_pos_one = pset1<Packet>(Scalar(1));
+    const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
+    return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
+  }
+};
+template <typename Packet>
+struct psignbit_impl<Packet, false, true> {
+  // generic implementation for integer packets
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
+};
+/** \internal \returns the sign bit of \a a as a bitmask*/
+template <typename Packet>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) {
+  return psignbit_impl<Packet>::run(a);
+}
+/** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
+template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
+  return numext::atan2(y, x);
+}
+/** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */
+template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
+  typedef typename internal::unpacket_traits<Packet>::type Scalar;
+  // See https://en.cppreference.com/w/cpp/numeric/math/atan2
+  // for how corner cases are supposed to be handled according to the
+  // IEEE floating-point standard (IEC 60559).
+  const Packet kSignMask = pset1<Packet>(-Scalar(0));
+  const Packet kZero = pzero(x);
+  const Packet kOne = pset1<Packet>(Scalar(1));
+  const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
+  const Packet x_has_signbit = psignbit(x);
+  const Packet y_signmask = pand(y, kSignMask);
+  const Packet x_signmask = pand(x, kSignMask);
+  const Packet result_signmask = pxor(y_signmask, x_signmask);
+  const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
+  const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
+  const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
+  Packet arg = pdiv(y, x);
+  arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
+  arg = pselect(x_and_y_are_zero, result_signmask, arg);
+  Packet result = patan(arg);
+  result = padd(result, shift);
+  return result;
+}
+/** \internal \returns the argument of \a a as a complex number */
+template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
+  return Packet(numext::arg(a));
+}
+/** \internal \returns the argument of \a a as a complex number */
+template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
+  EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex,
+                      THIS METHOD IS FOR COMPLEX TYPES ONLY)
+  using RealPacket = typename unpacket_traits<Packet>::as_real;
+  // a                                              // r     i    r     i    ...
+  RealPacket aflip = pcplxflip(a).v;                // i     r    i     r    ...
+  RealPacket result = patan2(aflip, a.v);           // atan2 crap atan2 crap ...
+  return (Packet)pand(result, peven_mask(result));  // atan2 0    atan2 0    ...
+}
+/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
+ * outside this range are not defined. \a *from does not need to be aligned, and can be null if \a count is zero.*/
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
+                                              Index count) {
+  using Scalar = typename unpacket_traits<Packet>::type;
+  constexpr Index PacketSize = unpacket_traits<Packet>::size;
+  eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
+  Scalar aux[PacketSize];
+  memset(static_cast<void*>(aux), 0x00, sizeof(Scalar) * PacketSize);
+  smart_copy(from + begin, from + begin + count, aux + begin);
+  return ploadu<Packet>(aux);
+}
+/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
+ * outside this range are not defined. \a *from must be aligned, and cannot be null.*/
+template <typename Packet>
+EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
+                                             Index count) {
+  return ploaduSegment<Packet>(from, begin, count);
+}
+/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
+Elements outside of the range [begin, begin + count) are not defined. \a *to does not need to be aligned, and can be
+null if \a count is zero.*/
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
+  constexpr Index PacketSize = unpacket_traits<Packet>::size;
+  eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
+  Scalar aux[PacketSize];
+  pstoreu<Scalar, Packet>(aux, from);
+  smart_copy(aux + begin, aux + begin + count, to + begin);
+}
+/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
+Elements outside of the range [begin, begin + count) are not defined. \a *to must be aligned, and cannot be
+null.*/
+template <typename Scalar, typename Packet>
+EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
+  return pstoreuSegment(to, from, begin, count);
+}
+/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
+ * outside this range are not defined.*/
+template <typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
+                                              Index count) {
+  constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
+  if (Alignment >= RequiredAlignment) {
+    return ploadSegment<Packet>(from, begin, count);
+  } else {
+    return ploaduSegment<Packet>(from, begin, count);
+  }
+}
+/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
+Elements outside of the range [begin, begin + count) are not defined.*/
+template <typename Scalar, typename Packet, int Alignment>
+EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
+  constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
+  if (Alignment >= RequiredAlignment) {
+    pstoreSegment<Scalar, Packet>(to, from, begin, count);
+  } else {
+    pstoreuSegment<Scalar, Packet>(to, from, begin, count);
+  }
+}
+#ifndef EIGEN_NO_IO
+template <typename Packet>
+class StreamablePacket {
+ public:
+  using Scalar = typename unpacket_traits<Packet>::type;
+  StreamablePacket(const Packet& packet) { pstoreu(v_, packet); }
+  friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) {
+    os << "{" << packet.v_[0];
+    for (int i = 1; i < unpacket_traits<Packet>::size; ++i) {
+      os << "," << packet.v_[i];
+    }
+    os << "}";
+    return os;
+  }
+ private:
+  Scalar v_[unpacket_traits<Packet>::size];
+};
+/**
+ * \internal \returns an intermediary that can be used to ostream packets, e.g. for debugging.
+ */
+template <typename Packet>
+StreamablePacket<Packet> postream(const Packet& packet) {
+  return StreamablePacket<Packet>(packet);
+}
+#endif  // EIGEN_NO_IO
+}  // end namespace internal
+}  // end namespace Eigen
+#endif  // EIGEN_GENERIC_PACKET_MATH_H
--- a/eigen-master/Eigen/src/Core/GlobalFunctions.h
+++ b/eigen-master/Eigen/src/Core/GlobalFunctions.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_GLOBAL_FUNCTIONS_H
+#define EIGEN_GLOBAL_FUNCTIONS_H
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME, FUNCTOR, DOC_OP, DOC_DETAILS)                                    \
+  /** \returns an expression of the coefficient-wise DOC_OP of \a x                                             \
+                                                                                                              \ \
+    DOC_DETAILS                                                                                                 \
+                                                                                                              \ \
+    \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_##NAME">Math functions</a>, class CwiseUnaryOp   \
+    */                                                                                                          \
+  template <typename Derived>                                                                                   \
+  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> NAME(     \
+      const Eigen::ArrayBase<Derived>& x);
+#else
+#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME, FUNCTOR, DOC_OP, DOC_DETAILS)                                    \
+  template <typename Derived>                                                                                   \
+  inline const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(NAME)(    \
+      const Eigen::ArrayBase<Derived>& x) {                                                                     \
+    return Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived>(x.derived()); \
+  }
+#endif  // EIGEN_PARSED_BY_DOXYGEN
+#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME, FUNCTOR)                                                  \
+                                                                                                               \
+  template <typename Derived>                                                                                  \
+  struct NAME##_retval<ArrayBase<Derived> > {                                                                  \
+    typedef const Eigen::CwiseUnaryOp<Eigen::internal::FUNCTOR<typename Derived::Scalar>, const Derived> type; \
+  };                                                                                                           \
+  template <typename Derived>                                                                                  \
+  struct NAME##_impl<ArrayBase<Derived> > {                                                                    \
+    static inline typename NAME##_retval<ArrayBase<Derived> >::type run(const Eigen::ArrayBase<Derived>& x) {  \
+      return typename NAME##_retval<ArrayBase<Derived> >::type(x.derived());                                   \
+    }                                                                                                          \
+  };
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real, scalar_real_op, real part,\sa ArrayBase::real)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag, scalar_imag_op, imaginary part,\sa ArrayBase::imag)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj, scalar_conjugate_op, complex conjugate,\sa ArrayBase::conjugate)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse, scalar_inverse_op, inverse,\sa ArrayBase::inverse)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin, scalar_sin_op, sine,\sa ArrayBase::sin)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos, scalar_cos_op, cosine,\sa ArrayBase::cos)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan, scalar_tan_op, tangent,\sa ArrayBase::tan)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan, scalar_atan_op, arc - tangent,\sa ArrayBase::atan)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin, scalar_asin_op, arc - sine,\sa ArrayBase::asin)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos, scalar_acos_op, arc - consine,\sa ArrayBase::acos)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh, scalar_sinh_op, hyperbolic sine,\sa ArrayBase::sinh)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh, scalar_cosh_op, hyperbolic cosine,\sa ArrayBase::cosh)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh, scalar_tanh_op, hyperbolic tangent,\sa ArrayBase::tanh)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asinh, scalar_asinh_op, inverse hyperbolic sine,\sa ArrayBase::asinh)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acosh, scalar_acosh_op, inverse hyperbolic cosine,\sa ArrayBase::acosh)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atanh, scalar_atanh_op, inverse hyperbolic tangent,\sa ArrayBase::atanh)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic, scalar_logistic_op, logistic function,\sa ArrayBase::logistic)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma, scalar_lgamma_op,
+                                 natural logarithm of the gamma function,\sa ArrayBase::lgamma)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma, scalar_digamma_op, derivative of lgamma,\sa ArrayBase::digamma)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf, scalar_erf_op, error function,\sa ArrayBase::erf)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc, scalar_erfc_op, complement error function,\sa ArrayBase::erfc)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri, scalar_ndtri_op, inverse normal distribution function,\sa ArrayBase::ndtri)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp, scalar_exp_op, exponential,\sa ArrayBase::exp)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp2, scalar_exp2_op, exponential,\sa ArrayBase::exp2)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1, scalar_expm1_op, exponential of a value minus 1,\sa ArrayBase::expm1)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log, scalar_log_op, natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p, scalar_log1p_op, natural logarithm of 1 plus the value,\sa ArrayBase::log1p)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10, scalar_log10_op, base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log10)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log2, scalar_log2_op, base 2 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log2)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs, scalar_abs_op, absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2, scalar_abs2_op,
+                                 squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg, scalar_arg_op, complex argument,\sa ArrayBase::arg DOXCOMMA MatrixBase::cwiseArg)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(carg, scalar_carg_op,
+                                 complex argument, \sa ArrayBase::carg DOXCOMMA MatrixBase::cwiseCArg)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt, scalar_sqrt_op, square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cbrt, scalar_cbrt_op, cube root,\sa ArrayBase::cbrt DOXCOMMA MatrixBase::cwiseCbrt)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt, scalar_rsqrt_op, reciprocal square root,\sa ArrayBase::rsqrt)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square, scalar_square_op,
+                                 square(power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube, scalar_cube_op, cube(power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rint, scalar_rint_op,
+                                 nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round, scalar_round_op,
+                                 nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(
+    floor, scalar_floor_op, nearest integer not greater than the given value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(
+    ceil, scalar_ceil_op, nearest integer not less than the given value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(trunc, scalar_trunc_op,
+                                 nearest integer not greater in magnitude than the given value,\sa Eigen::trunc DOXCOMMA
+                                     ArrayBase::trunc)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(
+    isnan, scalar_isnan_op, not -a - number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(
+    isinf, scalar_isinf_op, infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite, scalar_isfinite_op,
+                                 finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite)
+EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign, scalar_sign_op, sign(or 0),\sa ArrayBase::sign)
+template <typename Derived, typename ScalarExponent>
+using GlobalUnaryPowReturnType = std::enable_if_t<
+    !internal::is_arithmetic<typename NumTraits<Derived>::Real>::value &&
+        internal::is_arithmetic<typename NumTraits<ScalarExponent>::Real>::value,
+    CwiseUnaryOp<internal::scalar_unary_pow_op<typename Derived::Scalar, ScalarExponent>, const Derived> >;
+/** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
+ *
+ * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given
+ * expression (\c Derived::Scalar).
+ *
+ * \sa ArrayBase::pow()
+ *
+ * \relates ArrayBase
+ */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+template <typename Derived, typename ScalarExponent>
+EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(const Eigen::ArrayBase<Derived>& x,
+                                                                                     const ScalarExponent& exponent);
+#else
+template <typename Derived, typename ScalarExponent>
+EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType<Derived, ScalarExponent> pow(const Eigen::ArrayBase<Derived>& x,
+                                                                                     const ScalarExponent& exponent) {
+  return GlobalUnaryPowReturnType<Derived, ScalarExponent>(
+      x.derived(), internal::scalar_unary_pow_op<typename Derived::Scalar, ScalarExponent>(exponent));
+}
+#endif
+/** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
+ *
+ * This function computes the coefficient-wise power.
+ *
+ * Example: \include Cwise_array_power_array.cpp
+ * Output: \verbinclude Cwise_array_power_array.out
+ *
+ * \sa ArrayBase::pow()
+ *
+ * \relates ArrayBase
+ */
+template <typename Derived, typename ExponentDerived>
+inline const Eigen::CwiseBinaryOp<
+    Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived,
+    const ExponentDerived>
+pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) {
+  return Eigen::CwiseBinaryOp<
+      Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived,
+      const ExponentDerived>(x.derived(), exponents.derived());
+}
+/** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
+ *
+ * This function computes the coefficient-wise power between a scalar and an array of exponents.
+ *
+ * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression
+ * (\c Derived::Scalar).
+ *
+ * Example: \include Cwise_scalar_power_array.cpp
+ * Output: \verbinclude Cwise_scalar_power_array.out
+ *
+ * \sa ArrayBase::pow()
+ *
+ * \relates ArrayBase
+ */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+template <typename Scalar, typename Derived>
+inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar, Derived::Scalar>, Constant<Scalar>, Derived> pow(
+    const Scalar& x, const Eigen::ArrayBase<Derived>& x);
+#else
+template <typename Scalar, typename Derived>
+EIGEN_DEVICE_FUNC inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(
+    typename internal::promote_scalar_arg<typename Derived::Scalar EIGEN_COMMA Scalar EIGEN_COMMA
+                                              EIGEN_SCALAR_BINARY_SUPPORTED(pow, Scalar,
+                                                                            typename Derived::Scalar)>::type,
+    Derived, pow) pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents) {
+  typedef
+      typename internal::promote_scalar_arg<typename Derived::Scalar, Scalar,
+                                            EIGEN_SCALAR_BINARY_SUPPORTED(pow, Scalar, typename Derived::Scalar)>::type
+          PromotedScalar;
+  return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedScalar, Derived, pow)(
+      typename internal::plain_constant_type<Derived, PromotedScalar>::type(
+          exponents.derived().rows(), exponents.derived().cols(), internal::scalar_constant_op<PromotedScalar>(x)),
+      exponents.derived());
+}
+#endif
+/** \returns an expression of the coefficient-wise atan2(\a x, \a y). \a x and \a y must be of the same type.
+ *
+ * This function computes the coefficient-wise atan2().
+ *
+ * \sa ArrayBase::atan2()
+ *
+ * \relates ArrayBase
+ */
+template <typename LhsDerived, typename RhsDerived>
+inline const std::enable_if_t<
+    std::is_same<typename LhsDerived::Scalar, typename RhsDerived::Scalar>::value,
+    Eigen::CwiseBinaryOp<Eigen::internal::scalar_atan2_op<typename LhsDerived::Scalar, typename RhsDerived::Scalar>,
+                         const LhsDerived, const RhsDerived> >
+atan2(const Eigen::ArrayBase<LhsDerived>& x, const Eigen::ArrayBase<RhsDerived>& exponents) {
+  return Eigen::CwiseBinaryOp<
+      Eigen::internal::scalar_atan2_op<typename LhsDerived::Scalar, typename RhsDerived::Scalar>, const LhsDerived,
+      const RhsDerived>(x.derived(), exponents.derived());
+}
+namespace internal {
+EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real, scalar_real_op)
+EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag, scalar_imag_op)
+EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2, scalar_abs2_op)
+}  // namespace internal
+}  // namespace Eigen
+// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random,
+// internal::isApprox...)
+#endif  // EIGEN_GLOBAL_FUNCTIONS_H
--- a/eigen-master/Eigen/src/Core/IO.h
+++ b/eigen-master/Eigen/src/Core/IO.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_IO_H
+#define EIGEN_IO_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+enum { DontAlignCols = 1 };
+enum { StreamPrecision = -1, FullPrecision = -2 };
+namespace internal {
+template <typename Derived>
+std::ostream& print_matrix(std::ostream& s, const Derived& _m, const IOFormat& fmt);
+}
+/** \class IOFormat
+ * \ingroup Core_Module
+ *
+ * \brief Stores a set of parameters controlling the way matrices are printed
+ *
+ * List of available parameters:
+ *  - \b precision number of digits for floating point values, or one of the special constants \c StreamPrecision and \c
+ * FullPrecision. The default is the special value \c StreamPrecision which means to use the stream's own precision
+ * setting, as set for instance using \c cout.precision(3). The other special value \c FullPrecision means that the
+ * number of digits will be computed to match the full precision of each floating-point type.
+ *  - \b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \c
+ * DontAlignCols which allows to disable the alignment of columns, resulting in faster code.
+ *  - \b coeffSeparator string printed between two coefficients of the same row
+ *  - \b rowSeparator string printed between two rows
+ *  - \b rowPrefix string printed at the beginning of each row
+ *  - \b rowSuffix string printed at the end of each row
+ *  - \b matPrefix string printed at the beginning of the matrix
+ *  - \b matSuffix string printed at the end of the matrix
+ *  - \b fill character printed to fill the empty space in aligned columns
+ *
+ * Example: \include IOFormat.cpp
+ * Output: \verbinclude IOFormat.out
+ *
+ * \sa DenseBase::format(), class WithFormat
+ */
+struct IOFormat {
+  /** Default constructor, see class IOFormat for the meaning of the parameters */
+  IOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _coeffSeparator = " ",
+           const std::string& _rowSeparator = "\n", const std::string& _rowPrefix = "",
+           const std::string& _rowSuffix = "", const std::string& _matPrefix = "", const std::string& _matSuffix = "",
+           const char _fill = ' ')
+      : matPrefix(_matPrefix),
+        matSuffix(_matSuffix),
+        rowPrefix(_rowPrefix),
+        rowSuffix(_rowSuffix),
+        rowSeparator(_rowSeparator),
+        rowSpacer(""),
+        coeffSeparator(_coeffSeparator),
+        fill(_fill),
+        precision(_precision),
+        flags(_flags) {
+    // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline
+    // don't add rowSpacer if columns are not to be aligned
+    if ((flags & DontAlignCols)) return;
+    int i = int(matPrefix.length()) - 1;
+    while (i >= 0 && matPrefix[i] != '\n') {
+      rowSpacer += ' ';
+      i--;
+    }
+  }
+  std::string matPrefix, matSuffix;
+  std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer;
+  std::string coeffSeparator;
+  char fill;
+  int precision;
+  int flags;
+};
+/** \class WithFormat
+ * \ingroup Core_Module
+ *
+ * \brief Pseudo expression providing matrix output with given format
+ *
+ * \tparam ExpressionType the type of the object on which IO stream operations are performed
+ *
+ * This class represents an expression with stream operators controlled by a given IOFormat.
+ * It is the return type of DenseBase::format()
+ * and most of the time this is the only way it is used.
+ *
+ * See class IOFormat for some examples.
+ *
+ * \sa DenseBase::format(), class IOFormat
+ */
+template <typename ExpressionType>
+class WithFormat {
+ public:
+  WithFormat(const ExpressionType& matrix, const IOFormat& format) : m_matrix(matrix), m_format(format) {}
+  friend std::ostream& operator<<(std::ostream& s, const WithFormat& wf) {
+    return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format);
+  }
+ protected:
+  typename ExpressionType::Nested m_matrix;
+  IOFormat m_format;
+};
+namespace internal {
+// NOTE: This helper is kept for backward compatibility with previous code specializing
+//       this internal::significant_decimals_impl structure. In the future we should directly
+//       call max_digits10().
+template <typename Scalar>
+struct significant_decimals_impl {
+  static inline int run() { return NumTraits<Scalar>::max_digits10(); }
+};
+/** \internal
+ * print the matrix \a _m to the output stream \a s using the output format \a fmt */
+template <typename Derived>
+std::ostream& print_matrix(std::ostream& s, const Derived& _m, const IOFormat& fmt) {
+  using internal::is_same;
+  if (_m.size() == 0) {
+    s << fmt.matPrefix << fmt.matSuffix;
+    return s;
+  }
+  typename Derived::Nested m = _m;
+  typedef typename Derived::Scalar Scalar;
+  typedef std::conditional_t<is_same<Scalar, char>::value || is_same<Scalar, unsigned char>::value ||
+                                 is_same<Scalar, numext::int8_t>::value || is_same<Scalar, numext::uint8_t>::value,
+                             int,
+                             std::conditional_t<is_same<Scalar, std::complex<char> >::value ||
+                                                    is_same<Scalar, std::complex<unsigned char> >::value ||
+                                                    is_same<Scalar, std::complex<numext::int8_t> >::value ||
+                                                    is_same<Scalar, std::complex<numext::uint8_t> >::value,
+                                                std::complex<int>, const Scalar&> >
+      PrintType;
+  Index width = 0;
+  std::streamsize explicit_precision;
+  if (fmt.precision == StreamPrecision) {
+    explicit_precision = 0;
+  } else if (fmt.precision == FullPrecision) {
+    if (NumTraits<Scalar>::IsInteger) {
+      explicit_precision = 0;
+    } else {
+      explicit_precision = significant_decimals_impl<Scalar>::run();
+    }
+  } else {
+    explicit_precision = fmt.precision;
+  }
+  std::streamsize old_precision = 0;
+  if (explicit_precision) old_precision = s.precision(explicit_precision);
+  bool align_cols = !(fmt.flags & DontAlignCols);
+  if (align_cols) {
+    // compute the largest width
+    for (Index j = 0; j < m.cols(); ++j)
+      for (Index i = 0; i < m.rows(); ++i) {
+        std::stringstream sstr;
+        sstr.copyfmt(s);
+        sstr << static_cast<PrintType>(m.coeff(i, j));
+        width = std::max<Index>(width, Index(sstr.str().length()));
+      }
+  }
+  std::streamsize old_width = s.width();
+  char old_fill_character = s.fill();
+  s << fmt.matPrefix;
+  for (Index i = 0; i < m.rows(); ++i) {
+    if (i) s << fmt.rowSpacer;
+    s << fmt.rowPrefix;
+    if (width) {
+      s.fill(fmt.fill);
+      s.width(width);
+    }
+    s << static_cast<PrintType>(m.coeff(i, 0));
+    for (Index j = 1; j < m.cols(); ++j) {
+      s << fmt.coeffSeparator;
+      if (width) {
+        s.fill(fmt.fill);
+        s.width(width);
+      }
+      s << static_cast<PrintType>(m.coeff(i, j));
+    }
+    s << fmt.rowSuffix;
+    if (i < m.rows() - 1) s << fmt.rowSeparator;
+  }
+  s << fmt.matSuffix;
+  if (explicit_precision) s.precision(old_precision);
+  if (width) {
+    s.fill(old_fill_character);
+    s.width(old_width);
+  }
+  return s;
+}
+}  // end namespace internal
+/** \relates DenseBase
+ *
+ * Outputs the matrix, to the given stream.
+ *
+ * If you wish to print the matrix with a format different than the default, use DenseBase::format().
+ *
+ * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers.
+ * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default
+ * parameters.
+ *
+ * \sa DenseBase::format()
+ */
+template <typename Derived>
+std::ostream& operator<<(std::ostream& s, const DenseBase<Derived>& m) {
+  return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT);
+}
+template <typename Derived>
+std::ostream& operator<<(std::ostream& s, const DiagonalBase<Derived>& m) {
+  return internal::print_matrix(s, m.derived(), EIGEN_DEFAULT_IO_FORMAT);
+}
+}  // end namespace Eigen
+#endif  // EIGEN_IO_H
--- a/eigen-master/Eigen/src/Core/IndexedView.h
+++ b/eigen-master/Eigen/src/Core/IndexedView.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_INDEXED_VIEW_H
+#define EIGEN_INDEXED_VIEW_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+template <typename XprType, typename RowIndices, typename ColIndices>
+struct traits<IndexedView<XprType, RowIndices, ColIndices>> : traits<XprType> {
+  enum {
+    RowsAtCompileTime = int(IndexedViewHelper<RowIndices>::SizeAtCompileTime),
+    ColsAtCompileTime = int(IndexedViewHelper<ColIndices>::SizeAtCompileTime),
+    MaxRowsAtCompileTime = RowsAtCompileTime,
+    MaxColsAtCompileTime = ColsAtCompileTime,
+    XprTypeIsRowMajor = (int(traits<XprType>::Flags) & RowMajorBit) != 0,
+    IsRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1)   ? 1
+                 : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0
+                                                                            : XprTypeIsRowMajor,
+    RowIncr = int(IndexedViewHelper<RowIndices>::IncrAtCompileTime),
+    ColIncr = int(IndexedViewHelper<ColIndices>::IncrAtCompileTime),
+    InnerIncr = IsRowMajor ? ColIncr : RowIncr,
+    OuterIncr = IsRowMajor ? RowIncr : ColIncr,
+    HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
+    XprInnerStride = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time<XprType>::ret)
+                                                  : int(outer_stride_at_compile_time<XprType>::ret),
+    XprOuterstride = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time<XprType>::ret)
+                                                  : int(inner_stride_at_compile_time<XprType>::ret),
+    InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime,
+    IsBlockAlike = InnerIncr == 1 && OuterIncr == 1,
+    IsInnerPannel = HasSameStorageOrderAsXprType &&
+                    is_same<AllRange<InnerSize>, std::conditional_t<XprTypeIsRowMajor, ColIndices, RowIndices>>::value,
+    InnerStrideAtCompileTime =
+        InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == Undefined
+            ? Dynamic
+            : XprInnerStride * InnerIncr,
+    OuterStrideAtCompileTime =
+        OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == Undefined
+            ? Dynamic
+            : XprOuterstride * OuterIncr,
+    ReturnAsScalar = is_single_range<RowIndices>::value && is_single_range<ColIndices>::value,
+    ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike,
+    ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock),
+    // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag,
+    // but this is too strict regarding negative strides...
+    DirectAccessMask = (int(InnerIncr) != Undefined && int(OuterIncr) != Undefined && InnerIncr >= 0 && OuterIncr >= 0)
+                           ? DirectAccessBit
+                           : 0,
+    FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
+    FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
+    FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
+    Flags = (traits<XprType>::Flags & (HereditaryBits | DirectAccessMask)) | FlagsLvalueBit | FlagsRowMajorBit |
+            FlagsLinearAccessBit
+  };
+  typedef Block<XprType, RowsAtCompileTime, ColsAtCompileTime, IsInnerPannel> BlockType;
+};
+template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind, bool DirectAccess>
+class IndexedViewImpl;
+}  // namespace internal
+/** \class IndexedView
+ * \ingroup Core_Module
+ *
+ * \brief Expression of a non-sequential sub-matrix defined by arbitrary sequences of row and column indices
+ *
+ * \tparam XprType the type of the expression in which we are taking the intersections of sub-rows and sub-columns
+ * \tparam RowIndices the type of the object defining the sequence of row indices
+ * \tparam ColIndices the type of the object defining the sequence of column indices
+ *
+ * This class represents an expression of a sub-matrix (or sub-vector) defined as the intersection
+ * of sub-sets of rows and columns, that are themself defined by generic sequences of row indices \f$
+ * \{r_0,r_1,..r_{m-1}\} \f$ and column indices \f$ \{c_0,c_1,..c_{n-1} \}\f$. Let \f$ A \f$  be the nested matrix, then
+ * the resulting matrix \f$ B \f$ has \c m rows and \c n columns, and its entries are given by: \f$ B(i,j) = A(r_i,c_j)
+ * \f$.
+ *
+ * The \c RowIndices and \c ColIndices types must be compatible with the following API:
+ * \code
+ * <integral type> operator[](Index) const;
+ * Index size() const;
+ * \endcode
+ *
+ * Typical supported types thus include:
+ *  - std::vector<int>
+ *  - std::valarray<int>
+ *  - std::array<int>
+ *  - Eigen::ArrayXi
+ *  - decltype(ArrayXi::LinSpaced(...))
+ *  - Any view/expressions of the previous types
+ *  - Eigen::ArithmeticSequence
+ *  - Eigen::internal::AllRange     (helper for Eigen::placeholders::all)
+ *  - Eigen::internal::SingleRange  (helper for single index)
+ *  - etc.
+ *
+ * In typical usages of %Eigen, this class should never be used directly. It is the return type of
+ * DenseBase::operator()(const RowIndices&, const ColIndices&).
+ *
+ * \sa class Block
+ */
+template <typename XprType, typename RowIndices, typename ColIndices>
+class IndexedView
+    : public internal::IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind,
+                                       (internal::traits<IndexedView<XprType, RowIndices, ColIndices>>::Flags &
+                                        DirectAccessBit) != 0> {
+ public:
+  typedef typename internal::IndexedViewImpl<
+      XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind,
+      (internal::traits<IndexedView<XprType, RowIndices, ColIndices>>::Flags & DirectAccessBit) != 0>
+      Base;
+  EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView)
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView)
+  template <typename T0, typename T1>
+  IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {}
+};
+namespace internal {
+// Generic API dispatcher
+template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind, bool DirectAccess>
+class IndexedViewImpl : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type {
+ public:
+  typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices>>::type Base;
+  typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested;
+  typedef internal::remove_all_t<XprType> NestedExpression;
+  typedef typename XprType::Scalar Scalar;
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl)
+  template <typename T0, typename T1>
+  IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices)
+      : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices) {}
+  /** \returns number of rows */
+  Index rows() const { return IndexedViewHelper<RowIndices>::size(m_rowIndices); }
+  /** \returns number of columns */
+  Index cols() const { return IndexedViewHelper<ColIndices>::size(m_colIndices); }
+  /** \returns the nested expression */
+  const internal::remove_all_t<XprType>& nestedExpression() const { return m_xpr; }
+  /** \returns the nested expression */
+  std::remove_reference_t<XprType>& nestedExpression() { return m_xpr; }
+  /** \returns a const reference to the object storing/generating the row indices */
+  const RowIndices& rowIndices() const { return m_rowIndices; }
+  /** \returns a const reference to the object storing/generating the column indices */
+  const ColIndices& colIndices() const { return m_colIndices; }
+  constexpr Scalar& coeffRef(Index rowId, Index colId) {
+    return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]);
+  }
+  constexpr const Scalar& coeffRef(Index rowId, Index colId) const {
+    return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]);
+  }
+ protected:
+  MatrixTypeNested m_xpr;
+  RowIndices m_rowIndices;
+  ColIndices m_colIndices;
+};
+template <typename XprType, typename RowIndices, typename ColIndices, typename StorageKind>
+class IndexedViewImpl<XprType, RowIndices, ColIndices, StorageKind, true>
+    : public IndexedViewImpl<XprType, RowIndices, ColIndices, StorageKind, false> {
+ public:
+  using Base = internal::IndexedViewImpl<XprType, RowIndices, ColIndices,
+                                         typename internal::traits<XprType>::StorageKind, false>;
+  using Derived = IndexedView<XprType, RowIndices, ColIndices>;
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl)
+  template <typename T0, typename T1>
+  IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {}
+  Index rowIncrement() const {
+    if (traits<Derived>::RowIncr != DynamicIndex && traits<Derived>::RowIncr != Undefined) {
+      return traits<Derived>::RowIncr;
+    }
+    return IndexedViewHelper<RowIndices>::incr(this->rowIndices());
+  }
+  Index colIncrement() const {
+    if (traits<Derived>::ColIncr != DynamicIndex && traits<Derived>::ColIncr != Undefined) {
+      return traits<Derived>::ColIncr;
+    }
+    return IndexedViewHelper<ColIndices>::incr(this->colIndices());
+  }
+  Index innerIncrement() const { return traits<Derived>::IsRowMajor ? colIncrement() : rowIncrement(); }
+  Index outerIncrement() const { return traits<Derived>::IsRowMajor ? rowIncrement() : colIncrement(); }
+  std::decay_t<typename XprType::Scalar>* data() {
+    Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride();
+    Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride();
+    return this->nestedExpression().data() + row_offset + col_offset;
+  }
+  const std::decay_t<typename XprType::Scalar>* data() const {
+    Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride();
+    Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride();
+    return this->nestedExpression().data() + row_offset + col_offset;
+  }
+  EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept {
+    if (traits<Derived>::InnerStrideAtCompileTime != Dynamic) {
+      return traits<Derived>::InnerStrideAtCompileTime;
+    }
+    return innerIncrement() * this->nestedExpression().innerStride();
+  }
+  EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept {
+    if (traits<Derived>::OuterStrideAtCompileTime != Dynamic) {
+      return traits<Derived>::OuterStrideAtCompileTime;
+    }
+    return outerIncrement() * this->nestedExpression().outerStride();
+  }
+};
+template <typename ArgType, typename RowIndices, typename ColIndices>
+struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased>
+    : evaluator_base<IndexedView<ArgType, RowIndices, ColIndices>> {
+  typedef IndexedView<ArgType, RowIndices, ColIndices> XprType;
+  enum {
+    CoeffReadCost = evaluator<ArgType>::CoeffReadCost /* TODO + cost of row/col index */,
+    FlagsLinearAccessBit =
+        (traits<XprType>::RowsAtCompileTime == 1 || traits<XprType>::ColsAtCompileTime == 1) ? LinearAccessBit : 0,
+    FlagsRowMajorBit = traits<XprType>::FlagsRowMajorBit,
+    Flags = (evaluator<ArgType>::Flags & (HereditaryBits & ~RowMajorBit /*| LinearAccessBit | DirectAccessBit*/)) |
+            FlagsLinearAccessBit | FlagsRowMajorBit,
+    Alignment = 0
+  };
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) {
+    EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
+  }
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
+                 m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
+    return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) {
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
+                 m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
+    return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) {
+    EIGEN_STATIC_ASSERT_LVALUE(XprType)
+    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
+    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
+                 m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
+    return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const {
+    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
+    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
+                 m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
+    return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const {
+    Index row = XprType::RowsAtCompileTime == 1 ? 0 : index;
+    Index col = XprType::RowsAtCompileTime == 1 ? index : 0;
+    eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() &&
+                 m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols());
+    return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]);
+  }
+ protected:
+  evaluator<ArgType> m_argImpl;
+  const XprType& m_xpr;
+};
+}  // end namespace internal
+}  // end namespace Eigen
+#endif  // EIGEN_INDEXED_VIEW_H
--- a/eigen-master/Eigen/src/Core/InnerProduct.h
+++ b/eigen-master/Eigen/src/Core/InnerProduct.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2024 Charlie Schlosser <cs.schlosser@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_INNER_PRODUCT_EVAL_H
+#define EIGEN_INNER_PRODUCT_EVAL_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+// recursively searches for the largest simd type that does not exceed Size, or the smallest if no such type exists
+template <typename Scalar, int Size, typename Packet = typename packet_traits<Scalar>::type,
+          bool Stop =
+              (unpacket_traits<Packet>::size <= Size) || is_same<Packet, typename unpacket_traits<Packet>::half>::value>
+struct find_inner_product_packet_helper;
+template <typename Scalar, int Size, typename Packet>
+struct find_inner_product_packet_helper<Scalar, Size, Packet, false> {
+  using type = typename find_inner_product_packet_helper<Scalar, Size, typename unpacket_traits<Packet>::half>::type;
+};
+template <typename Scalar, int Size, typename Packet>
+struct find_inner_product_packet_helper<Scalar, Size, Packet, true> {
+  using type = Packet;
+};
+template <typename Scalar, int Size>
+struct find_inner_product_packet : find_inner_product_packet_helper<Scalar, Size> {};
+template <typename Scalar>
+struct find_inner_product_packet<Scalar, Dynamic> {
+  using type = typename packet_traits<Scalar>::type;
+};
+template <typename Lhs, typename Rhs>
+struct inner_product_assert {
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Lhs)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Rhs)
+  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Lhs, Rhs)
+#ifndef EIGEN_NO_DEBUG
+  static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, const Rhs& rhs) {
+    eigen_assert((lhs.size() == rhs.size()) && "Inner product: lhs and rhs vectors must have same size");
+  }
+#else
+  static EIGEN_DEVICE_FUNC void run(const Lhs&, const Rhs&) {}
+#endif
+};
+template <typename Func, typename Lhs, typename Rhs>
+struct inner_product_evaluator {
+  static constexpr int LhsFlags = evaluator<Lhs>::Flags;
+  static constexpr int RhsFlags = evaluator<Rhs>::Flags;
+  static constexpr int SizeAtCompileTime = size_prefer_fixed(Lhs::SizeAtCompileTime, Rhs::SizeAtCompileTime);
+  static constexpr int MaxSizeAtCompileTime =
+      min_size_prefer_fixed(Lhs::MaxSizeAtCompileTime, Rhs::MaxSizeAtCompileTime);
+  static constexpr int LhsAlignment = evaluator<Lhs>::Alignment;
+  static constexpr int RhsAlignment = evaluator<Rhs>::Alignment;
+  using Scalar = typename Func::result_type;
+  using Packet = typename find_inner_product_packet<Scalar, SizeAtCompileTime>::type;
+  static constexpr bool Vectorize =
+      bool(LhsFlags & RhsFlags & PacketAccessBit) && Func::PacketAccess &&
+      ((MaxSizeAtCompileTime == Dynamic) || (unpacket_traits<Packet>::size <= MaxSizeAtCompileTime));
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit inner_product_evaluator(const Lhs& lhs, const Rhs& rhs,
+                                                                         Func func = Func())
+      : m_func(func), m_lhs(lhs), m_rhs(rhs), m_size(lhs.size()) {
+    inner_product_assert<Lhs, Rhs>::run(lhs, rhs);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_size.value(); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const {
+    return m_func.coeff(m_lhs.coeff(index), m_rhs.coeff(index));
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& value, Index index) const {
+    return m_func.coeff(value, m_lhs.coeff(index), m_rhs.coeff(index));
+  }
+  template <typename PacketType, int LhsMode = LhsAlignment, int RhsMode = RhsAlignment>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
+    return m_func.packet(m_lhs.template packet<LhsMode, PacketType>(index),
+                         m_rhs.template packet<RhsMode, PacketType>(index));
+  }
+  template <typename PacketType, int LhsMode = LhsAlignment, int RhsMode = RhsAlignment>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(const PacketType& value, Index index) const {
+    return m_func.packet(value, m_lhs.template packet<LhsMode, PacketType>(index),
+                         m_rhs.template packet<RhsMode, PacketType>(index));
+  }
+  const Func m_func;
+  const evaluator<Lhs> m_lhs;
+  const evaluator<Rhs> m_rhs;
+  const variable_if_dynamic<Index, SizeAtCompileTime> m_size;
+};
+template <typename Evaluator, bool Vectorize = Evaluator::Vectorize>
+struct inner_product_impl;
+// scalar loop
+template <typename Evaluator>
+struct inner_product_impl<Evaluator, false> {
+  using Scalar = typename Evaluator::Scalar;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) {
+    const Index size = eval.size();
+    if (size == 0) return Scalar(0);
+    Scalar result = eval.coeff(0);
+    for (Index k = 1; k < size; k++) {
+      result = eval.coeff(result, k);
+    }
+    return result;
+  }
+};
+// vector loop
+template <typename Evaluator>
+struct inner_product_impl<Evaluator, true> {
+  using UnsignedIndex = std::make_unsigned_t<Index>;
+  using Scalar = typename Evaluator::Scalar;
+  using Packet = typename Evaluator::Packet;
+  static constexpr int PacketSize = unpacket_traits<Packet>::size;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) {
+    const UnsignedIndex size = static_cast<UnsignedIndex>(eval.size());
+    if (size < PacketSize) return inner_product_impl<Evaluator, false>::run(eval);
+    const UnsignedIndex packetEnd = numext::round_down(size, PacketSize);
+    const UnsignedIndex quadEnd = numext::round_down(size, 4 * PacketSize);
+    const UnsignedIndex numPackets = size / PacketSize;
+    const UnsignedIndex numRemPackets = (packetEnd - quadEnd) / PacketSize;
+    Packet presult0, presult1, presult2, presult3;
+    presult0 = eval.template packet<Packet>(0 * PacketSize);
+    if (numPackets >= 2) presult1 = eval.template packet<Packet>(1 * PacketSize);
+    if (numPackets >= 3) presult2 = eval.template packet<Packet>(2 * PacketSize);
+    if (numPackets >= 4) {
+      presult3 = eval.template packet<Packet>(3 * PacketSize);
+      for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) {
+        presult0 = eval.packet(presult0, k + 0 * PacketSize);
+        presult1 = eval.packet(presult1, k + 1 * PacketSize);
+        presult2 = eval.packet(presult2, k + 2 * PacketSize);
+        presult3 = eval.packet(presult3, k + 3 * PacketSize);
+      }
+      if (numRemPackets >= 1) presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize);
+      if (numRemPackets >= 2) presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize);
+      if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize);
+      presult2 = padd(presult2, presult3);
+    }
+    if (numPackets >= 3) presult1 = padd(presult1, presult2);
+    if (numPackets >= 2) presult0 = padd(presult0, presult1);
+    Scalar result = predux(presult0);
+    for (UnsignedIndex k = packetEnd; k < size; k++) {
+      result = eval.coeff(result, k);
+    }
+    return result;
+  }
+};
+template <typename Scalar, bool Conj>
+struct conditional_conj;
+template <typename Scalar>
+struct conditional_conj<Scalar, true> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return numext::conj(a); }
+  template <typename Packet>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) {
+    return pconj(a);
+  }
+};
+template <typename Scalar>
+struct conditional_conj<Scalar, false> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return a; }
+  template <typename Packet>
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) {
+    return a;
+  }
+};
+template <typename LhsScalar, typename RhsScalar, bool Conj>
+struct scalar_inner_product_op {
+  using result_type = typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType;
+  using conj_helper = conditional_conj<LhsScalar, Conj>;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const LhsScalar& a, const RhsScalar& b) const {
+    return (conj_helper::coeff(a) * b);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const result_type& accum, const LhsScalar& a,
+                                                          const RhsScalar& b) const {
+    return (conj_helper::coeff(a) * b) + accum;
+  }
+  static constexpr bool PacketAccess = false;
+};
+template <typename Scalar, bool Conj>
+struct scalar_inner_product_op<Scalar, Scalar, Conj> {
+  using result_type = Scalar;
+  using conj_helper = conditional_conj<Scalar, Conj>;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a, const Scalar& b) const {
+    return pmul(conj_helper::coeff(a), b);
+  }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& accum, const Scalar& a, const Scalar& b) const {
+    return pmadd(conj_helper::coeff(a), b, accum);
+  }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a, const Packet& b) const {
+    return pmul(conj_helper::packet(a), b);
+  }
+  template <typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& accum, const Packet& a, const Packet& b) const {
+    return pmadd(conj_helper::packet(a), b, accum);
+  }
+  static constexpr bool PacketAccess = packet_traits<Scalar>::HasMul && packet_traits<Scalar>::HasAdd;
+};
+template <typename Lhs, typename Rhs, bool Conj>
+struct default_inner_product_impl {
+  using LhsScalar = typename traits<Lhs>::Scalar;
+  using RhsScalar = typename traits<Rhs>::Scalar;
+  using Op = scalar_inner_product_op<LhsScalar, RhsScalar, Conj>;
+  using Evaluator = inner_product_evaluator<Op, Lhs, Rhs>;
+  using result_type = typename Evaluator::Scalar;
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type run(const MatrixBase<Lhs>& a, const MatrixBase<Rhs>& b) {
+    Evaluator eval(a.derived(), b.derived(), Op());
+    return inner_product_impl<Evaluator>::run(eval);
+  }
+};
+template <typename Lhs, typename Rhs>
+struct dot_impl : default_inner_product_impl<Lhs, Rhs, true> {};
+}  // namespace internal
+}  // namespace Eigen
+#endif  // EIGEN_INNER_PRODUCT_EVAL_H
--- a/eigen-master/Eigen/src/Core/InternalHeaderCheck.h
+++ b/eigen-master/Eigen/src/Core/InternalHeaderCheck.h
+#ifndef EIGEN_CORE_MODULE_H
+#error "Please include Eigen/Core instead of including headers inside the src directory directly."
+#endif
--- a/eigen-master/Eigen/src/Core/Inverse.h
+++ b/eigen-master/Eigen/src/Core/Inverse.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014-2019 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_INVERSE_H
+#define EIGEN_INVERSE_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+template <typename XprType, typename StorageKind>
+class InverseImpl;
+namespace internal {
+template <typename XprType>
+struct traits<Inverse<XprType> > : traits<typename XprType::PlainObject> {
+  typedef typename XprType::PlainObject PlainObject;
+  typedef traits<PlainObject> BaseTraits;
+  enum { Flags = BaseTraits::Flags & RowMajorBit };
+};
+}  // end namespace internal
+/** \class Inverse
+ *
+ * \brief Expression of the inverse of another expression
+ *
+ * \tparam XprType the type of the expression we are taking the inverse
+ *
+ * This class represents an abstract expression of A.inverse()
+ * and most of the time this is the only way it is used.
+ *
+ */
+template <typename XprType>
+class Inverse : public InverseImpl<XprType, typename internal::traits<XprType>::StorageKind> {
+ public:
+  typedef typename XprType::StorageIndex StorageIndex;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename internal::ref_selector<XprType>::type XprTypeNested;
+  typedef internal::remove_all_t<XprTypeNested> XprTypeNestedCleaned;
+  typedef typename internal::ref_selector<Inverse>::type Nested;
+  typedef internal::remove_all_t<XprType> NestedExpression;
+  explicit EIGEN_DEVICE_FUNC Inverse(const XprType& xpr) : m_xpr(xpr) {}
+  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_xpr.cols(); }
+  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_xpr.rows(); }
+  EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; }
+ protected:
+  XprTypeNested m_xpr;
+};
+// Generic API dispatcher
+template <typename XprType, typename StorageKind>
+class InverseImpl : public internal::generic_xpr_base<Inverse<XprType> >::type {
+ public:
+  typedef typename internal::generic_xpr_base<Inverse<XprType> >::type Base;
+  typedef typename XprType::Scalar Scalar;
+ private:
+  Scalar coeff(Index row, Index col) const;
+  Scalar coeff(Index i) const;
+};
+namespace internal {
+/** \internal
+ * \brief Default evaluator for Inverse expression.
+ *
+ * This default evaluator for Inverse expression simply evaluate the inverse into a temporary
+ * by a call to internal::call_assignment_no_alias.
+ * Therefore, inverse implementers only have to specialize Assignment<Dst,Inverse<...>, ...> for
+ * there own nested expression.
+ *
+ * \sa class Inverse
+ */
+template <typename ArgType>
+struct unary_evaluator<Inverse<ArgType> > : public evaluator<typename Inverse<ArgType>::PlainObject> {
+  typedef Inverse<ArgType> InverseType;
+  typedef typename InverseType::PlainObject PlainObject;
+  typedef evaluator<PlainObject> Base;
+  enum { Flags = Base::Flags | EvalBeforeNestingBit };
+  EIGEN_DEVICE_FUNC unary_evaluator(const InverseType& inv_xpr) : m_result(inv_xpr.rows(), inv_xpr.cols()) {
+    internal::construct_at<Base>(this, m_result);
+    internal::call_assignment_no_alias(m_result, inv_xpr);
+  }
+ protected:
+  PlainObject m_result;
+};
+}  // end namespace internal
+}  // end namespace Eigen
+#endif  // EIGEN_INVERSE_H
--- a/eigen-master/Eigen/src/Core/Map.h
+++ b/eigen-master/Eigen/src/Core/Map.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_MAP_H
+#define EIGEN_MAP_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+template <typename PlainObjectType, int MapOptions, typename StrideType>
+struct traits<Map<PlainObjectType, MapOptions, StrideType> > : public traits<PlainObjectType> {
+  typedef traits<PlainObjectType> TraitsBase;
+  enum {
+    PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags & RowMajorBit) == RowMajorBit)
+                                   ? PlainObjectType::ColsAtCompileTime
+                                   : PlainObjectType::RowsAtCompileTime,
+    InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
+                                   ? int(PlainObjectType::InnerStrideAtCompileTime)
+                                   : int(StrideType::InnerStrideAtCompileTime),
+    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
+                                   ? (InnerStrideAtCompileTime == Dynamic || PlainObjectTypeInnerSize == Dynamic
+                                          ? Dynamic
+                                          : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
+                                   : int(StrideType::OuterStrideAtCompileTime),
+    Alignment = int(MapOptions) & int(AlignedMask),
+    Flags0 = TraitsBase::Flags & (~NestByRefBit),
+    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
+  };
+ private:
+  enum { Options };  // Expressions don't have Options
+};
+}  // namespace internal
+/** \class Map
+ * \ingroup Core_Module
+ *
+ * \brief A matrix or vector expression mapping an existing array of data.
+ *
+ * \tparam PlainObjectType the equivalent matrix type of the mapped data
+ * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, \c #Aligned64, \c #Aligned32,
+ * \c #Aligned16, \c #Aligned8 or \c #Unaligned. The default is \c #Unaligned. \tparam StrideType optionally specifies
+ * strides. By default, Map assumes the memory layout of an ordinary, contiguous array. This can be overridden by
+ * specifying strides. The type passed here must be a specialization of the Stride template, see examples below.
+ *
+ * This class represents a matrix or vector expression mapping an existing array of data.
+ * It can be used to let Eigen interface without any overhead with non-Eigen data structures,
+ * such as plain C arrays or structures from other libraries. By default, it assumes that the
+ * data is laid out contiguously in memory. You can however override this by explicitly specifying
+ * inner and outer strides.
+ *
+ * Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix:
+ * \include Map_simple.cpp
+ * Output: \verbinclude Map_simple.out
+ *
+ * If you need to map non-contiguous arrays, you can do so by specifying strides:
+ *
+ * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer
+ * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time
+ * fixed value.
+ * \include Map_inner_stride.cpp
+ * Output: \verbinclude Map_inner_stride.out
+ *
+ * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping
+ * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns.
+ * Here, we're specifying the outer stride as a runtime parameter. Note that here \c OuterStride<> is
+ * a short version of \c OuterStride<Dynamic> because the default template parameter of OuterStride
+ * is  \c Dynamic
+ * \include Map_outer_stride.cpp
+ * Output: \verbinclude Map_outer_stride.out
+ *
+ * For more details and for an example of specifying both an inner and an outer stride, see class Stride.
+ *
+ * \b Tip: to change the array of data mapped by a Map object, you can use the C++
+ * placement new syntax:
+ *
+ * Example: \include Map_placement_new.cpp
+ * Output: \verbinclude Map_placement_new.out
+ *
+ * This class is the return type of PlainObjectBase::Map() but can also be used directly.
+ *
+ * \sa PlainObjectBase::Map(), \ref TopicStorageOrders
+ */
+template <typename PlainObjectType, int MapOptions, typename StrideType>
+class Map : public MapBase<Map<PlainObjectType, MapOptions, StrideType> > {
+ public:
+  typedef MapBase<Map> Base;
+  EIGEN_DENSE_PUBLIC_INTERFACE(Map)
+  typedef typename Base::PointerType PointerType;
+  typedef PointerType PointerArgType;
+  EIGEN_DEVICE_FUNC inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
+  EIGEN_DEVICE_FUNC constexpr Index innerStride() const {
+    return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
+  }
+  EIGEN_DEVICE_FUNC constexpr Index outerStride() const {
+    return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
+           : internal::traits<Map>::OuterStrideAtCompileTime != Dynamic
+               ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
+           : IsVectorAtCompileTime    ? (this->size() * innerStride())
+           : int(Flags) & RowMajorBit ? (this->cols() * innerStride())
+                                      : (this->rows() * innerStride());
+  }
+  /** Constructor in the fixed-size case.
+   *
+   * \param dataPtr pointer to the array to map
+   * \param stride optional Stride object, passing the strides.
+   */
+  EIGEN_DEVICE_FUNC explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) {}
+  /** Constructor in the dynamic-size vector case.
+   *
+   * \param dataPtr pointer to the array to map
+   * \param size the size of the vector expression
+   * \param stride optional Stride object, passing the strides.
+   */
+  EIGEN_DEVICE_FUNC inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) {}
+  /** Constructor in the dynamic-size matrix case.
+   *
+   * \param dataPtr pointer to the array to map
+   * \param rows the number of rows of the matrix expression
+   * \param cols the number of columns of the matrix expression
+   * \param stride optional Stride object, passing the strides.
+   */
+  EIGEN_DEVICE_FUNC inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType())
+      : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) {}
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map)
+ protected:
+  StrideType m_stride;
+};
+}  // end namespace Eigen
+#endif  // EIGEN_MAP_H
--- a/eigen-master/Eigen/src/Core/MapBase.h
+++ b/eigen-master/Eigen/src/Core/MapBase.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_MAPBASE_H
+#define EIGEN_MAPBASE_H
+#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)                                                               \
+  EIGEN_STATIC_ASSERT((int(internal::evaluator<Derived>::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \
+                      YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT)
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+/** \ingroup Core_Module
+ *
+ * \brief Base class for dense Map and Block expression with direct access
+ *
+ * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense
+ * Map and Block objects with direct access.
+ * Typical users do not have to directly deal with this class.
+ *
+ * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN.
+ * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details.
+ *
+ * The \c Derived class has to provide the following two methods describing the memory layout:
+ *  \code Index innerStride() const; \endcode
+ *  \code Index outerStride() const; \endcode
+ *
+ * \sa class Map, class Block
+ */
+template <typename Derived>
+class MapBase<Derived, ReadOnlyAccessors> : public internal::dense_xpr_base<Derived>::type {
+ public:
+  typedef typename internal::dense_xpr_base<Derived>::type Base;
+  enum {
+    RowsAtCompileTime = internal::traits<Derived>::RowsAtCompileTime,
+    ColsAtCompileTime = internal::traits<Derived>::ColsAtCompileTime,
+    InnerStrideAtCompileTime = internal::traits<Derived>::InnerStrideAtCompileTime,
+    SizeAtCompileTime = Base::SizeAtCompileTime
+  };
+  typedef typename internal::traits<Derived>::StorageKind StorageKind;
+  typedef typename internal::traits<Derived>::Scalar Scalar;
+  typedef typename internal::packet_traits<Scalar>::type PacketScalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef std::conditional_t<bool(internal::is_lvalue<Derived>::value), Scalar*, const Scalar*> PointerType;
+  using Base::derived;
+  //    using Base::RowsAtCompileTime;
+  //    using Base::ColsAtCompileTime;
+  //    using Base::SizeAtCompileTime;
+  using Base::Flags;
+  using Base::IsRowMajor;
+  using Base::IsVectorAtCompileTime;
+  using Base::MaxColsAtCompileTime;
+  using Base::MaxRowsAtCompileTime;
+  using Base::MaxSizeAtCompileTime;
+  using Base::coeff;
+  using Base::coeffRef;
+  using Base::cols;
+  using Base::eval;
+  using Base::lazyAssign;
+  using Base::rows;
+  using Base::size;
+  using Base::colStride;
+  using Base::innerStride;
+  using Base::outerStride;
+  using Base::rowStride;
+  // bug 217 - compile error on ICC 11.1
+  using Base::operator=;
+  typedef typename Base::CoeffReturnType CoeffReturnType;
+  /** \copydoc DenseBase::rows() */
+  EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_rows.value(); }
+  /** \copydoc DenseBase::cols() */
+  EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_cols.value(); }
+  /** Returns a pointer to the first coefficient of the matrix or vector.
+   *
+   * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride().
+   *
+   * \sa innerStride(), outerStride()
+   */
+  EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_data; }
+  /** \copydoc PlainObjectBase::coeff(Index,Index) const */
+  EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const {
+    return m_data[colId * colStride() + rowId * rowStride()];
+  }
+  /** \copydoc PlainObjectBase::coeff(Index) const */
+  EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index index) const {
+    EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+    return m_data[index * innerStride()];
+  }
+  /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */
+  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const {
+    return this->m_data[colId * colStride() + rowId * rowStride()];
+  }
+  /** \copydoc PlainObjectBase::coeffRef(Index) const */
+  EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const {
+    EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+    return this->m_data[index * innerStride()];
+  }
+  /** \internal */
+  template <int LoadMode>
+  inline PacketScalar packet(Index rowId, Index colId) const {
+    return internal::ploadt<PacketScalar, LoadMode>(m_data + (colId * colStride() + rowId * rowStride()));
+  }
+  /** \internal */
+  template <int LoadMode>
+  inline PacketScalar packet(Index index) const {
+    EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+    return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
+  }
+  /** \internal Constructor for fixed size matrices or vectors */
+  EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr)
+      : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) {
+    EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
+    checkSanity<Derived>();
+  }
+  /** \internal Constructor for dynamically sized vectors */
+  EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize)
+      : m_data(dataPtr),
+        m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
+        m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime)) {
+    EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+    eigen_assert(vecSize >= 0);
+    eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize);
+    checkSanity<Derived>();
+  }
+  /** \internal Constructor for dynamically sized matrices */
+  EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols)
+      : m_data(dataPtr), m_rows(rows), m_cols(cols) {
+    eigen_assert((dataPtr == 0) || (rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) &&
+                                    cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)));
+    checkSanity<Derived>();
+  }
+#ifdef EIGEN_MAPBASE_PLUGIN
+#include EIGEN_MAPBASE_PLUGIN
+#endif
+ protected:
+  EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)
+  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)
+  template <typename T>
+  EIGEN_DEVICE_FUNC void checkSanity(std::enable_if_t<(internal::traits<T>::Alignment > 0), void*> = 0) const {
+// Temporary macro to allow scalars to not be properly aligned.  This is while we sort out failures
+// in TensorFlow Lite that are currently relying on this UB.
+#ifndef EIGEN_ALLOW_UNALIGNED_SCALARS
+    // Pointer must be aligned to the Scalar type, otherwise we get UB.
+    eigen_assert((std::uintptr_t(m_data) % alignof(Scalar) == 0) && "data is not scalar-aligned");
+#endif
+#if EIGEN_MAX_ALIGN_BYTES > 0
+    // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible
+    // value:
+    const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime);
+    EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride);
+    eigen_assert((((std::uintptr_t(m_data) % internal::traits<Derived>::Alignment) == 0) ||
+                  (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits<Derived>::Alignment) &&
+                 "data is not aligned");
+#endif
+  }
+  template <typename T>
+  EIGEN_DEVICE_FUNC void checkSanity(std::enable_if_t<internal::traits<T>::Alignment == 0, void*> = 0) const {
+#ifndef EIGEN_ALLOW_UNALIGNED_SCALARS
+    // Pointer must be aligned to the Scalar type, otherwise we get UB.
+    eigen_assert((std::uintptr_t(m_data) % alignof(Scalar) == 0) && "data is not scalar-aligned");
+#endif
+  }
+  PointerType m_data;
+  const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_rows;
+  const internal::variable_if_dynamic<Index, ColsAtCompileTime> m_cols;
+};
+/** \ingroup Core_Module
+ *
+ * \brief Base class for non-const dense Map and Block expression with direct access
+ *
+ * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of
+ * dense Map and Block objects with direct access.
+ * It inherits MapBase<Derived, ReadOnlyAccessors> which defines the const variant for reading specific entries.
+ *
+ * \sa class Map, class Block
+ */
+template <typename Derived>
+class MapBase<Derived, WriteAccessors> : public MapBase<Derived, ReadOnlyAccessors> {
+  typedef MapBase<Derived, ReadOnlyAccessors> ReadOnlyMapBase;
+ public:
+  typedef MapBase<Derived, ReadOnlyAccessors> Base;
+  typedef typename Base::Scalar Scalar;
+  typedef typename Base::PacketScalar PacketScalar;
+  typedef typename Base::StorageIndex StorageIndex;
+  typedef typename Base::PointerType PointerType;
+  using Base::coeff;
+  using Base::coeffRef;
+  using Base::cols;
+  using Base::derived;
+  using Base::rows;
+  using Base::size;
+  using Base::colStride;
+  using Base::innerStride;
+  using Base::outerStride;
+  using Base::rowStride;
+  typedef std::conditional_t<internal::is_lvalue<Derived>::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue;
+  EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return this->m_data; }
+  EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() {
+    return this->m_data;
+  }  // no const-cast here so non-const-correct code will give a compile error
+  EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) {
+    return this->m_data[col * colStride() + row * rowStride()];
+  }
+  EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index index) {
+    EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+    return this->m_data[index * innerStride()];
+  }
+  template <int StoreMode>
+  inline void writePacket(Index row, Index col, const PacketScalar& val) {
+    internal::pstoret<Scalar, PacketScalar, StoreMode>(this->m_data + (col * colStride() + row * rowStride()), val);
+  }
+  template <int StoreMode>
+  inline void writePacket(Index index, const PacketScalar& val) {
+    EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
+    internal::pstoret<Scalar, PacketScalar, StoreMode>(this->m_data + index * innerStride(), val);
+  }
+  EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+  EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+  EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {}
+  EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) {
+    ReadOnlyMapBase::Base::operator=(other);
+    return derived();
+  }
+  // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base,
+  // see bugs 821 and 920.
+  using ReadOnlyMapBase::Base::operator=;
+ protected:
+  EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase)
+  EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase)
+};
+#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS
+}  // end namespace Eigen
+#endif  // EIGEN_MAPBASE_H
--- a/eigen-master/Eigen/src/Core/MathFunctions.h
+++ b/eigen-master/Eigen/src/Core/MathFunctions.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_MATHFUNCTIONS_H
+#define EIGEN_MATHFUNCTIONS_H
+// TODO this should better be moved to NumTraits
+// Source: WolframAlpha
+#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L
+#define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L
+#define EIGEN_LN2 0.693147180559945309417232121458176568075500134360255254120680009493393621L
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+/** \internal \class global_math_functions_filtering_base
+ *
+ * What it does:
+ * Defines a typedef 'type' as follows:
+ * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then
+ *   global_math_functions_filtering_base<T>::type is a typedef for it.
+ * - otherwise, global_math_functions_filtering_base<T>::type is a typedef for T.
+ *
+ * How it's used:
+ * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions.
+ * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know
+ * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase<Derived>.
+ * So we must make sure to use sin_impl<ArrayBase<Derived> > and not sin_impl<Derived>, otherwise our partial
+ * specialization won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells
+ * it.
+ *
+ * How it's implemented:
+ * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you
+ * replace the typename dummy by an integer template parameter, it doesn't work anymore!
+ */
+template <typename T, typename dummy = void>
+struct global_math_functions_filtering_base {
+  typedef T type;
+};
+template <typename T>
+struct always_void {
+  typedef void type;
+};
+template <typename T>
+struct global_math_functions_filtering_base<
+    T, typename always_void<typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl>::type> {
+  typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type;
+};
+#define EIGEN_MATHFUNC_IMPL(func, scalar) \
+  Eigen::internal::func##_impl<typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>
+#define EIGEN_MATHFUNC_RETVAL(func, scalar) \
+  typename Eigen::internal::func##_retval<  \
+      typename Eigen::internal::global_math_functions_filtering_base<scalar>::type>::type
+/****************************************************************************
+ * Implementation of real                                                 *
+ ****************************************************************************/
+template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct real_default_impl {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x; }
+};
+template <typename Scalar>
+struct real_default_impl<Scalar, true> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    using std::real;
+    return real(x);
+  }
+};
+template <typename Scalar>
+struct real_impl : real_default_impl<Scalar> {};
+#if defined(EIGEN_GPU_COMPILE_PHASE)
+template <typename T>
+struct real_impl<std::complex<T>> {
+  typedef T RealScalar;
+  EIGEN_DEVICE_FUNC static inline T run(const std::complex<T>& x) { return x.real(); }
+};
+#endif
+template <typename Scalar>
+struct real_retval {
+  typedef typename NumTraits<Scalar>::Real type;
+};
+/****************************************************************************
+ * Implementation of imag                                                 *
+ ****************************************************************************/
+template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct imag_default_impl {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar&) { return RealScalar(0); }
+};
+template <typename Scalar>
+struct imag_default_impl<Scalar, true> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    using std::imag;
+    return imag(x);
+  }
+};
+template <typename Scalar>
+struct imag_impl : imag_default_impl<Scalar> {};
+#if defined(EIGEN_GPU_COMPILE_PHASE)
+template <typename T>
+struct imag_impl<std::complex<T>> {
+  typedef T RealScalar;
+  EIGEN_DEVICE_FUNC static inline T run(const std::complex<T>& x) { return x.imag(); }
+};
+#endif
+template <typename Scalar>
+struct imag_retval {
+  typedef typename NumTraits<Scalar>::Real type;
+};
+/****************************************************************************
+ * Implementation of real_ref                                             *
+ ****************************************************************************/
+template <typename Scalar>
+struct real_ref_impl {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast<RealScalar*>(&x)[0]; }
+  EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) {
+    return reinterpret_cast<const RealScalar*>(&x)[0];
+  }
+};
+template <typename Scalar>
+struct real_ref_retval {
+  typedef typename NumTraits<Scalar>::Real& type;
+};
+/****************************************************************************
+ * Implementation of imag_ref                                             *
+ ****************************************************************************/
+template <typename Scalar, bool IsComplex>
+struct imag_ref_default_impl {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast<RealScalar*>(&x)[1]; }
+  EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) {
+    return reinterpret_cast<const RealScalar*>(&x)[1];
+  }
+};
+template <typename Scalar>
+struct imag_ref_default_impl<Scalar, false> {
+  EIGEN_DEVICE_FUNC constexpr static Scalar run(Scalar&) { return Scalar(0); }
+  EIGEN_DEVICE_FUNC constexpr static const Scalar run(const Scalar&) { return Scalar(0); }
+};
+template <typename Scalar>
+struct imag_ref_impl : imag_ref_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+template <typename Scalar>
+struct imag_ref_retval {
+  typedef typename NumTraits<Scalar>::Real& type;
+};
+// implementation in MathFunctionsImpl.h
+template <typename Mask, bool is_built_in_float = std::is_floating_point<Mask>::value>
+struct scalar_select_mask;
+}  // namespace internal
+namespace numext {
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t<EIGEN_MATHFUNC_RETVAL(real_ref, Scalar)> real_ref(
+    const Scalar& x) {
+  return internal::real_ref_impl<Scalar>::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
+}
+template <typename Scalar, typename Mask>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar select(const Mask& mask, const Scalar& a, const Scalar& b) {
+  return internal::scalar_select_mask<Mask>::run(mask) ? b : a;
+}
+}  // namespace numext
+namespace internal {
+/****************************************************************************
+ * Implementation of conj                                                 *
+ ****************************************************************************/
+template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct conj_default_impl {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { return x; }
+};
+template <typename Scalar>
+struct conj_default_impl<Scalar, true> {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) {
+    using std::conj;
+    return conj(x);
+  }
+};
+template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct conj_impl : conj_default_impl<Scalar, IsComplex> {};
+template <typename Scalar>
+struct conj_retval {
+  typedef Scalar type;
+};
+/****************************************************************************
+ * Implementation of abs2                                                 *
+ ****************************************************************************/
+template <typename Scalar, bool IsComplex>
+struct abs2_impl_default {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x * x; }
+};
+template <typename Scalar>
+struct abs2_impl_default<Scalar, true>  // IsComplex
+{
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    return numext::real(x) * numext::real(x) + numext::imag(x) * numext::imag(x);
+  }
+};
+template <typename Scalar>
+struct abs2_impl {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    return abs2_impl_default<Scalar, NumTraits<Scalar>::IsComplex>::run(x);
+  }
+};
+template <typename Scalar>
+struct abs2_retval {
+  typedef typename NumTraits<Scalar>::Real type;
+};
+/****************************************************************************
+ * Implementation of sqrt/rsqrt                                             *
+ ****************************************************************************/
+template <typename Scalar>
+struct sqrt_impl {
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE Scalar run(const Scalar& x) {
+    EIGEN_USING_STD(sqrt);
+    return sqrt(x);
+  }
+};
+// Complex sqrt defined in MathFunctionsImpl.h.
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& a_x);
+// Custom implementation is faster than `std::sqrt`, works on
+// GPU, and correctly handles special cases (unlike MSVC).
+template <typename T>
+struct sqrt_impl<std::complex<T>> {
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) { return complex_sqrt(x); }
+};
+template <typename Scalar>
+struct sqrt_retval {
+  typedef Scalar type;
+};
+// Default implementation relies on numext::sqrt, at bottom of file.
+template <typename T>
+struct rsqrt_impl;
+// Complex rsqrt defined in MathFunctionsImpl.h.
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& a_x);
+template <typename T>
+struct rsqrt_impl<std::complex<T>> {
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex<T> run(const std::complex<T>& x) {
+    return complex_rsqrt(x);
+  }
+};
+template <typename Scalar>
+struct rsqrt_retval {
+  typedef Scalar type;
+};
+/****************************************************************************
+ * Implementation of norm1                                                *
+ ****************************************************************************/
+template <typename Scalar, bool IsComplex>
+struct norm1_default_impl;
+template <typename Scalar>
+struct norm1_default_impl<Scalar, true> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    EIGEN_USING_STD(abs);
+    return abs(numext::real(x)) + abs(numext::imag(x));
+  }
+};
+template <typename Scalar>
+struct norm1_default_impl<Scalar, false> {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) {
+    EIGEN_USING_STD(abs);
+    return abs(x);
+  }
+};
+template <typename Scalar>
+struct norm1_impl : norm1_default_impl<Scalar, NumTraits<Scalar>::IsComplex> {};
+template <typename Scalar>
+struct norm1_retval {
+  typedef typename NumTraits<Scalar>::Real type;
+};
+/****************************************************************************
+ * Implementation of hypot                                                *
+ ****************************************************************************/
+template <typename Scalar>
+struct hypot_impl;
+template <typename Scalar>
+struct hypot_retval {
+  typedef typename NumTraits<Scalar>::Real type;
+};
+/****************************************************************************
+ * Implementation of cast                                                 *
+ ****************************************************************************/
+template <typename OldType, typename NewType, typename EnableIf = void>
+struct cast_impl {
+  EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x) { return static_cast<NewType>(x); }
+};
+template <typename OldType>
+struct cast_impl<OldType, bool> {
+  EIGEN_DEVICE_FUNC static inline bool run(const OldType& x) { return x != OldType(0); }
+};
+// Casting from S -> Complex<T> leads to an implicit conversion from S to T,
+// generating warnings on clang.  Here we explicitly cast the real component.
+template <typename OldType, typename NewType>
+struct cast_impl<OldType, NewType,
+                 typename std::enable_if_t<!NumTraits<OldType>::IsComplex && NumTraits<NewType>::IsComplex>> {
+  EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x) {
+    typedef typename NumTraits<NewType>::Real NewReal;
+    return static_cast<NewType>(static_cast<NewReal>(x));
+  }
+};
+// here, for once, we're plainly returning NewType: we don't want cast to do weird things.
+template <typename OldType, typename NewType>
+EIGEN_DEVICE_FUNC inline NewType cast(const OldType& x) {
+  return cast_impl<OldType, NewType>::run(x);
+}
+/****************************************************************************
+ * Implementation of arg                                                     *
+ ****************************************************************************/
+// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs.
+// This seems to be fixed in VS 2019.
+#if (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920)
+// std::arg is only defined for types of std::complex, or integer types or float/double/long double
+template <typename Scalar, bool HasStdImpl = NumTraits<Scalar>::IsComplex || is_integral<Scalar>::value ||
+                                             is_same<Scalar, float>::value || is_same<Scalar, double>::value ||
+                                             is_same<Scalar, long double>::value>
+struct arg_default_impl;
+template <typename Scalar>
+struct arg_default_impl<Scalar, true> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    // There is no official ::arg on device in CUDA/HIP, so we always need to use std::arg.
+    using std::arg;
+    return static_cast<RealScalar>(arg(x));
+  }
+};
+// Must be non-complex floating-point type (e.g. half/bfloat16).
+template <typename Scalar>
+struct arg_default_impl<Scalar, false> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    return (x < Scalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);
+  }
+};
+#else
+template <typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
+struct arg_default_impl {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    return (x < RealScalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0);
+  }
+};
+template <typename Scalar>
+struct arg_default_impl<Scalar, true> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) {
+    EIGEN_USING_STD(arg);
+    return arg(x);
+  }
+};
+#endif
+template <typename Scalar>
+struct arg_impl : arg_default_impl<Scalar> {};
+template <typename Scalar>
+struct arg_retval {
+  typedef typename NumTraits<Scalar>::Real type;
+};
+/****************************************************************************
+ * Implementation of expm1                                                   *
+ ****************************************************************************/
+// This implementation is based on GSL Math's expm1.
+namespace std_fallback {
+// fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar,
+// or that there is no suitable std::expm1 function available. Implementation
+// attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php.
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) {
+  EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_USING_STD(exp);
+  Scalar u = exp(x);
+  if (numext::equal_strict(u, Scalar(1))) {
+    return x;
+  }
+  Scalar um1 = u - RealScalar(1);
+  if (numext::equal_strict(um1, Scalar(-1))) {
+    return RealScalar(-1);
+  }
+  EIGEN_USING_STD(log);
+  Scalar logu = log(u);
+  return numext::equal_strict(u, logu) ? u : (u - RealScalar(1)) * x / logu;
+}
+}  // namespace std_fallback
+template <typename Scalar>
+struct expm1_impl {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) {
+    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+    EIGEN_USING_STD(expm1);
+    return expm1(x);
+  }
+};
+template <typename Scalar>
+struct expm1_retval {
+  typedef Scalar type;
+};
+/****************************************************************************
+ * Implementation of log                                                     *
+ ****************************************************************************/
+// Complex log defined in MathFunctionsImpl.h.
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z);
+template <typename Scalar>
+struct log_impl {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) {
+    EIGEN_USING_STD(log);
+    return static_cast<Scalar>(log(x));
+  }
+};
+template <typename Scalar>
+struct log_impl<std::complex<Scalar>> {
+  EIGEN_DEVICE_FUNC static inline std::complex<Scalar> run(const std::complex<Scalar>& z) { return complex_log(z); }
+};
+/****************************************************************************
+ * Implementation of log1p                                                   *
+ ****************************************************************************/
+namespace std_fallback {
+// fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar,
+// or that there is no suitable std::log1p function available
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) {
+  EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_USING_STD(log);
+  Scalar x1p = RealScalar(1) + x;
+  Scalar log_1p = log_impl<Scalar>::run(x1p);
+  const bool is_small = numext::equal_strict(x1p, Scalar(1));
+  const bool is_inf = numext::equal_strict(x1p, log_1p);
+  return (is_small || is_inf) ? x : x * (log_1p / (x1p - RealScalar(1)));
+}
+}  // namespace std_fallback
+template <typename Scalar>
+struct log1p_impl {
+  EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) {
+    EIGEN_USING_STD(log1p);
+    return log1p(x);
+  }
+};
+// Specialization for complex types that are not supported by std::log1p.
+template <typename RealScalar>
+struct log1p_impl<std::complex<RealScalar>> {
+  EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
+  EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(const std::complex<RealScalar>& x) {
+    return std_fallback::log1p(x);
+  }
+};
+template <typename Scalar>
+struct log1p_retval {
+  typedef Scalar type;
+};
+/****************************************************************************
+ * Implementation of pow                                                  *
+ ****************************************************************************/
+template <typename ScalarX, typename ScalarY,
+          bool IsInteger = NumTraits<ScalarX>::IsInteger && NumTraits<ScalarY>::IsInteger>
+struct pow_impl {
+  // typedef Scalar retval;
+  typedef typename ScalarBinaryOpTraits<ScalarX, ScalarY, internal::scalar_pow_op<ScalarX, ScalarY>>::ReturnType
+      result_type;
+  static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) {
+    EIGEN_USING_STD(pow);
+    return pow(x, y);
+  }
+};
+template <typename ScalarX, typename ScalarY>
+struct pow_impl<ScalarX, ScalarY, true> {
+  typedef ScalarX result_type;
+  static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y) {
+    ScalarX res(1);
+    eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);
+    if (y & 1) res *= x;
+    y >>= 1;
+    while (y) {
+      x *= x;
+      if (y & 1) res *= x;
+      y >>= 1;
+    }
+    return res;
+  }
+};
+enum { meta_floor_log2_terminate, meta_floor_log2_move_up, meta_floor_log2_move_down, meta_floor_log2_bogus };
+template <unsigned int n, int lower, int upper>
+struct meta_floor_log2_selector {
+  enum {
+    middle = (lower + upper) / 2,
+    value = (upper <= lower + 1)  ? int(meta_floor_log2_terminate)
+            : (n < (1 << middle)) ? int(meta_floor_log2_move_down)
+            : (n == 0)            ? int(meta_floor_log2_bogus)
+                                  : int(meta_floor_log2_move_up)
+  };
+};
+template <unsigned int n, int lower = 0, int upper = sizeof(unsigned int) * CHAR_BIT - 1,
+          int selector = meta_floor_log2_selector<n, lower, upper>::value>
+struct meta_floor_log2 {};
+template <unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_down> {
+  enum { value = meta_floor_log2<n, lower, meta_floor_log2_selector<n, lower, upper>::middle>::value };
+};
+template <unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_move_up> {
+  enum { value = meta_floor_log2<n, meta_floor_log2_selector<n, lower, upper>::middle, upper>::value };
+};
+template <unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_terminate> {
+  enum { value = (n >= ((unsigned int)(1) << (lower + 1))) ? lower + 1 : lower };
+};
+template <unsigned int n, int lower, int upper>
+struct meta_floor_log2<n, lower, upper, meta_floor_log2_bogus> {
+  // no value, error at compile time
+};
+template <typename BitsType, typename EnableIf = void>
+struct count_bits_impl {
+  static_assert(std::is_integral<BitsType>::value && std::is_unsigned<BitsType>::value,
+                "BitsType must be an unsigned integer");
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    int n = CHAR_BIT * sizeof(BitsType);
+    int shift = n / 2;
+    while (bits > 0 && shift > 0) {
+      BitsType y = bits >> shift;
+      if (y > 0) {
+        n -= shift;
+        bits = y;
+      }
+      shift /= 2;
+    }
+    if (shift == 0) {
+      --n;
+    }
+    return n;
+  }
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    int n = CHAR_BIT * sizeof(BitsType);
+    int shift = n / 2;
+    while (bits > 0 && shift > 0) {
+      BitsType y = bits << shift;
+      if (y > 0) {
+        n -= shift;
+        bits = y;
+      }
+      shift /= 2;
+    }
+    if (shift == 0) {
+      --n;
+    }
+    return n;
+  }
+};
+// Count leading zeros.
+template <typename BitsType>
+EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+  return count_bits_impl<BitsType>::clz(bits);
+}
+// Count trailing zeros.
+template <typename BitsType>
+EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+  return count_bits_impl<BitsType>::ctz(bits);
+}
+#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+template <typename BitsType>
+struct count_bits_impl<
+    BitsType, std::enable_if_t<std::is_integral<BitsType>::value && sizeof(BitsType) <= sizeof(unsigned int)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT;
+    return bits == 0 ? kNumBits : __builtin_clz(static_cast<unsigned int>(bits)) - kLeadingBitsOffset;
+  }
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    return bits == 0 ? kNumBits : __builtin_ctz(static_cast<unsigned int>(bits));
+  }
+};
+template <typename BitsType>
+struct count_bits_impl<BitsType,
+                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned int) < sizeof(BitsType) &&
+                                        sizeof(BitsType) <= sizeof(unsigned long)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT;
+    return bits == 0 ? kNumBits : __builtin_clzl(static_cast<unsigned long>(bits)) - kLeadingBitsOffset;
+  }
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    return bits == 0 ? kNumBits : __builtin_ctzl(static_cast<unsigned long>(bits));
+  }
+};
+template <typename BitsType>
+struct count_bits_impl<BitsType,
+                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned long) < sizeof(BitsType) &&
+                                        sizeof(BitsType) <= sizeof(unsigned long long)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT;
+    return bits == 0 ? kNumBits : __builtin_clzll(static_cast<unsigned long long>(bits)) - kLeadingBitsOffset;
+  }
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    return bits == 0 ? kNumBits : __builtin_ctzll(static_cast<unsigned long long>(bits));
+  }
+};
+#elif EIGEN_COMP_MSVC
+template <typename BitsType>
+struct count_bits_impl<
+    BitsType, std::enable_if_t<std::is_integral<BitsType>::value && sizeof(BitsType) <= sizeof(unsigned long)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    unsigned long out;
+    _BitScanReverse(&out, static_cast<unsigned long>(bits));
+    return bits == 0 ? kNumBits : (kNumBits - 1) - static_cast<int>(out);
+  }
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    unsigned long out;
+    _BitScanForward(&out, static_cast<unsigned long>(bits));
+    return bits == 0 ? kNumBits : static_cast<int>(out);
+  }
+};
+#ifdef _WIN64
+template <typename BitsType>
+struct count_bits_impl<BitsType,
+                       std::enable_if_t<std::is_integral<BitsType>::value && sizeof(unsigned long) < sizeof(BitsType) &&
+                                        sizeof(BitsType) <= sizeof(__int64)>> {
+  static constexpr int kNumBits = static_cast<int>(sizeof(BitsType) * CHAR_BIT);
+  static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) {
+    unsigned long out;
+    _BitScanReverse64(&out, static_cast<unsigned __int64>(bits));
+    return bits == 0 ? kNumBits : (kNumBits - 1) - static_cast<int>(out);
+  }
+  static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) {
+    unsigned long out;
+    _BitScanForward64(&out, static_cast<unsigned __int64>(bits));
+    return bits == 0 ? kNumBits : static_cast<int>(out);
+  }
+};
+#endif  // _WIN64
+#endif  // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG
+template <typename BitsType>
+struct log_2_impl {
+  static constexpr int kTotalBits = sizeof(BitsType) * CHAR_BIT;
+  static EIGEN_DEVICE_FUNC inline int run_ceil(const BitsType& x) {
+    const int n = kTotalBits - clz(x);
+    bool power_of_two = (x & (x - 1)) == 0;
+    return x == 0 ? 0 : power_of_two ? (n - 1) : n;
+  }
+  static EIGEN_DEVICE_FUNC inline int run_floor(const BitsType& x) {
+    const int n = kTotalBits - clz(x);
+    return x == 0 ? 0 : n - 1;
+  }
+};
+template <typename BitsType>
+int log2_ceil(const BitsType& x) {
+  return log_2_impl<BitsType>::run_ceil(x);
+}
+template <typename BitsType>
+int log2_floor(const BitsType& x) {
+  return log_2_impl<BitsType>::run_floor(x);
+}
+// Implementation of is* functions
+template <typename T>
+EIGEN_DEVICE_FUNC std::enable_if_t<!(std::numeric_limits<T>::has_infinity || std::numeric_limits<T>::has_quiet_NaN ||
+                                     std::numeric_limits<T>::has_signaling_NaN),
+                                   bool>
+isfinite_impl(const T&) {
+  return true;
+}
+template <typename T>
+EIGEN_DEVICE_FUNC std::enable_if_t<(std::numeric_limits<T>::has_infinity || std::numeric_limits<T>::has_quiet_NaN ||
+                                    std::numeric_limits<T>::has_signaling_NaN) &&
+                                       (!NumTraits<T>::IsComplex),
+                                   bool>
+isfinite_impl(const T& x) {
+  EIGEN_USING_STD(isfinite);
+  return isfinite EIGEN_NOT_A_MACRO(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC std::enable_if_t<!std::numeric_limits<T>::has_infinity, bool> isinf_impl(const T&) {
+  return false;
+}
+template <typename T>
+EIGEN_DEVICE_FUNC std::enable_if_t<(std::numeric_limits<T>::has_infinity && !NumTraits<T>::IsComplex), bool> isinf_impl(
+    const T& x) {
+  EIGEN_USING_STD(isinf);
+  return isinf EIGEN_NOT_A_MACRO(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC
+std::enable_if_t<!(std::numeric_limits<T>::has_quiet_NaN || std::numeric_limits<T>::has_signaling_NaN), bool>
+isnan_impl(const T&) {
+  return false;
+}
+template <typename T>
+EIGEN_DEVICE_FUNC std::enable_if_t<
+    (std::numeric_limits<T>::has_quiet_NaN || std::numeric_limits<T>::has_signaling_NaN) && (!NumTraits<T>::IsComplex),
+    bool>
+isnan_impl(const T& x) {
+  EIGEN_USING_STD(isnan);
+  return isnan EIGEN_NOT_A_MACRO(x);
+}
+// The following overload are defined at the end of this file
+template <typename T>
+EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x);
+template <typename T>
+EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x);
+template <typename T>
+EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
+template <typename T>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x);
+/****************************************************************************
+ * Implementation of sign                                                 *
+ ****************************************************************************/
+template <typename Scalar, bool IsComplex = (NumTraits<Scalar>::IsComplex != 0),
+          bool IsInteger = (NumTraits<Scalar>::IsInteger != 0)>
+struct sign_impl {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a) { return Scalar((a > Scalar(0)) - (a < Scalar(0))); }
+};
+template <typename Scalar>
+struct sign_impl<Scalar, false, false> {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a) {
+    return (isnan_impl<Scalar>)(a) ? a : Scalar((a > Scalar(0)) - (a < Scalar(0)));
+  }
+};
+template <typename Scalar, bool IsInteger>
+struct sign_impl<Scalar, true, IsInteger> {
+  EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a) {
+    using real_type = typename NumTraits<Scalar>::Real;
+    EIGEN_USING_STD(abs);
+    real_type aa = abs(a);
+    if (aa == real_type(0)) return Scalar(0);
+    aa = real_type(1) / aa;
+    return Scalar(numext::real(a) * aa, numext::imag(a) * aa);
+  }
+};
+// The sign function for bool is the identity.
+template <>
+struct sign_impl<bool, false, true> {
+  EIGEN_DEVICE_FUNC static inline bool run(const bool& a) { return a; }
+};
+template <typename Scalar>
+struct sign_retval {
+  typedef Scalar type;
+};
+// suppress "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC
+// note: `0 - a` is distinct from `-a` when Scalar is a floating point type and `a` is zero
+template <typename Scalar, bool IsInteger = NumTraits<Scalar>::IsInteger>
+struct negate_impl {
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return -a; }
+};
+template <typename Scalar>
+struct negate_impl<Scalar, true> {
+  EIGEN_STATIC_ASSERT((!is_same<Scalar, bool>::value), NEGATE IS NOT DEFINED FOR BOOLEAN TYPES)
+  static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return Scalar(0) - a; }
+};
+template <typename Scalar>
+struct negate_retval {
+  typedef Scalar type;
+};
+template <typename Scalar, bool IsInteger = NumTraits<typename unpacket_traits<Scalar>::type>::IsInteger>
+struct nearest_integer_impl {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_floor(const Scalar& x) {
+    EIGEN_USING_STD(floor) return floor(x);
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_ceil(const Scalar& x) {
+    EIGEN_USING_STD(ceil) return ceil(x);
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_rint(const Scalar& x) {
+    EIGEN_USING_STD(rint) return rint(x);
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) {
+    EIGEN_USING_STD(round) return round(x);
+  }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) {
+    EIGEN_USING_STD(trunc) return trunc(x);
+  }
+};
+template <typename Scalar>
+struct nearest_integer_impl<Scalar, true> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_floor(const Scalar& x) { return x; }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_ceil(const Scalar& x) { return x; }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_rint(const Scalar& x) { return x; }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { return x; }
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { return x; }
+};
+// Default implementation.
+template <typename Scalar, typename Enable = void>
+struct fma_impl {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& a, const Scalar& b, const Scalar& c) {
+    return a * b + c;
+  }
+};
+// ADL version if it exists.
+template <typename T>
+struct fma_impl<
+    T,
+    std::enable_if_t<std::is_same<T, decltype(fma(std::declval<T>(), std::declval<T>(), std::declval<T>()))>::value>> {
+  static T run(const T& a, const T& b, const T& c) { return fma(a, b, c); }
+};
+#if defined(EIGEN_GPUCC)
+template <>
+struct fma_impl<float, void> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float run(const float& a, const float& b, const float& c) {
+    return ::fmaf(a, b, c);
+  }
+};
+template <>
+struct fma_impl<double, void> {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double run(const double& a, const double& b, const double& c) {
+    return ::fma(a, b, c);
+  }
+};
+#endif
+}  // end namespace internal
+/****************************************************************************
+ * Generic math functions                                                    *
+ ****************************************************************************/
+namespace numext {
+#if (!defined(EIGEN_GPUCC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC))
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) {
+  EIGEN_USING_STD(min)
+  return min EIGEN_NOT_A_MACRO(x, y);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) {
+  EIGEN_USING_STD(max)
+  return max EIGEN_NOT_A_MACRO(x, y);
+}
+#else
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) {
+  return y < x ? y : x;
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) {
+  return fminf(x, y);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y) {
+  return fmin(x, y);
+}
+#ifndef EIGEN_GPU_COMPILE_PHASE
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y) {
+#if defined(EIGEN_HIPCC)
+  // no "fminl" on HIP yet
+  return (x < y) ? x : y;
+#else
+  return fminl(x, y);
+#endif
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) {
+  return x < y ? y : x;
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) {
+  return fmaxf(x, y);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y) {
+  return fmax(x, y);
+}
+#ifndef EIGEN_GPU_COMPILE_PHASE
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y) {
+#if defined(EIGEN_HIPCC)
+  // no "fmaxl" on HIP yet
+  return (x > y) ? x : y;
+#else
+  return fmaxl(x, y);
+#endif
+}
+#endif
+#endif
+#if defined(SYCL_DEVICE_ONLY)
+#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_char)    \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_short)   \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_int)     \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_long)
+#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_char)    \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_short)   \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_int)     \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_long)
+#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar)     \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort)    \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uint)      \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong)
+#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar)     \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort)    \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uint)      \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong)
+#define SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(NAME, FUNC)  \
+  SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \
+  SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC)
+#define SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(NAME, FUNC)  \
+  SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \
+  SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC)
+#define SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(NAME, FUNC)     \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \
+  SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_double)
+#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(NAME, FUNC)     \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \
+  SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_double)
+#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(NAME, FUNC, RET_TYPE) \
+  SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_float)       \
+  SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double)
+#define SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE)     \
+  template <>                                                              \
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE& x) { \
+    return cl::sycl::FUNC(x);                                              \
+  }
+#define SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, TYPE) SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, TYPE, TYPE)
+#define SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE1, ARG_TYPE2)            \
+  template <>                                                                                   \
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE1& x, const ARG_TYPE2& y) { \
+    return cl::sycl::FUNC(x, y);                                                                \
+  }
+#define SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \
+  SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE, ARG_TYPE)
+#define SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, TYPE) SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, TYPE, TYPE)
+SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(mini, min)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(mini, fmin)
+SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(maxi, max)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax)
+#endif
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t<EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar)> imag_ref(
+    const Scalar& x) {
+  return internal::imag_ref_impl<Scalar>::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(sign, Scalar)::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(negate, Scalar) negate(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(negate, Scalar)::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
+}
+EIGEN_DEVICE_FUNC inline bool abs2(bool x) { return x; }
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T absdiff(const T& x, const T& y) {
+  return x > y ? x - y : y - x;
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float absdiff(const float& x, const float& y) {
+  return fabsf(x - y);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double absdiff(const double& x, const double& y) {
+  return fabs(x - y);
+}
+// HIP and CUDA do not support long double.
+#ifndef EIGEN_GPU_COMPILE_PHASE
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE long double absdiff(const long double& x, const long double& y) {
+  return fabsl(x - y);
+}
+#endif
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y) {
+  return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(hypot, hypot)
+#endif
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log1p, log1p)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log1p(const float& x) {
+  return ::log1pf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double log1p(const double& x) {
+  return ::log1p(x);
+}
+#endif
+template <typename ScalarX, typename ScalarY>
+EIGEN_DEVICE_FUNC inline typename internal::pow_impl<ScalarX, ScalarY>::result_type pow(const ScalarX& x,
+                                                                                        const ScalarY& y) {
+  return internal::pow_impl<ScalarX, ScalarY>::run(x, y);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(pow, pow)
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC bool(isnan)(const T& x) {
+  return internal::isnan_impl(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC bool(isinf)(const T& x) {
+  return internal::isinf_impl(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC bool(isfinite)(const T& x) {
+  return internal::isfinite_impl(x);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isnan, isnan, bool)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool)
+#endif
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar rint(const Scalar& x) {
+  return internal::nearest_integer_impl<Scalar>::run_rint(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar round(const Scalar& x) {
+  return internal::nearest_integer_impl<Scalar>::run_round(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(floor)(const Scalar& x) {
+  return internal::nearest_integer_impl<Scalar>::run_floor(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(ceil)(const Scalar& x) {
+  return internal::nearest_integer_impl<Scalar>::run_ceil(x);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(trunc)(const Scalar& x) {
+  return internal::nearest_integer_impl<Scalar>::run_trunc(x);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(trunc, trunc)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float& x) {
+  return ::floorf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double floor(const double& x) {
+  return ::floor(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float& x) {
+  return ::ceilf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double ceil(const double& x) {
+  return ::ceil(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float trunc(const float& x) {
+  return ::truncf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double trunc(const double& x) {
+  return ::trunc(x);
+}
+#endif
+// Integer division with rounding up.
+// T is assumed to be an integer type with a>=0, and b>0
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T div_ceil(T a, T b) {
+  using UnsignedT = typename internal::make_unsigned<T>::type;
+  EIGEN_STATIC_ASSERT((NumTraits<T>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
+  // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations
+  const UnsignedT ua = UnsignedT(a);
+  const UnsignedT ub = UnsignedT(b);
+  // Note: This form is used because it cannot overflow.
+  return ua == 0 ? 0 : (ua - 1) / ub + 1;
+}
+// Integer round down to nearest power of b
+// T is assumed to be an integer type with a>=0, and b>0
+template <typename T, typename U>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T round_down(T a, U b) {
+  using UnsignedT = typename internal::make_unsigned<T>::type;
+  using UnsignedU = typename internal::make_unsigned<U>::type;
+  EIGEN_STATIC_ASSERT((NumTraits<T>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
+  EIGEN_STATIC_ASSERT((NumTraits<U>::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES)
+  // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations
+  const UnsignedT ua = UnsignedT(a);
+  const UnsignedU ub = UnsignedU(b);
+  return ub * (ua / ub);
+}
+/** Log base 2 for 32 bits positive integers.
+ * Conveniently returns 0 for x==0. */
+constexpr int log2(int x) {
+  unsigned int v(x);
+  constexpr int table[32] = {0, 9,  1,  10, 13, 21, 2,  29, 11, 14, 16, 18, 22, 25, 3, 30,
+                             8, 12, 20, 28, 15, 17, 24, 7,  19, 27, 23, 6,  26, 5,  4, 31};
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  return table[(v * 0x07C4ACDDU) >> 27];
+}
+/** \returns the square root of \a x.
+ *
+ * It is essentially equivalent to
+ * \code using std::sqrt; return sqrt(x); \endcode
+ * but slightly faster for float/double and some compilers (e.g., gcc), thanks to
+ * specializations when SSE is enabled.
+ *
+ * It's usage is justified in performance critical functions, like norm/normalize.
+ */
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(sqrt, Scalar)::run(x);
+}
+// Boolean specialization, avoids implicit float to bool conversion (-Wimplicit-conversion-floating-point-to-bool).
+template <>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC bool sqrt<bool>(const bool& x) {
+  return x;
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt)
+#endif
+/** \returns the cube root of \a x. **/
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t<!NumTraits<T>::IsComplex, T> cbrt(const T& x) {
+  EIGEN_USING_STD(cbrt);
+  return static_cast<T>(cbrt(x));
+}
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t<NumTraits<T>::IsComplex, T> cbrt(const T& x) {
+  EIGEN_USING_STD(pow);
+  return pow(x, typename NumTraits<T>::Real(1.0 / 3.0));
+}
+/** \returns the reciprocal square root of \a x. **/
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T rsqrt(const T& x) {
+  return internal::rsqrt_impl<T>::run(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T& x) {
+  return internal::log_impl<T>::run(x);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log, log)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log(const float& x) {
+  return ::logf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double log(const double& x) {
+  return ::log(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE std::enable_if_t<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex, typename NumTraits<T>::Real>
+abs(const T& x) {
+  EIGEN_USING_STD(abs);
+  return abs(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC
+EIGEN_ALWAYS_INLINE std::enable_if_t<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex), typename NumTraits<T>::Real>
+abs(const T& x) {
+  return x;
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(abs, abs)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(abs, fabs)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const float& x) {
+  return ::fabsf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double abs(const double& x) {
+  return ::fabs(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const std::complex<float>& x) {
+  return ::hypotf(x.real(), x.imag());
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double abs(const std::complex<double>& x) {
+  return ::hypot(x.real(), x.imag());
+}
+#endif
+template <typename Scalar, bool IsInteger = NumTraits<Scalar>::IsInteger, bool IsSigned = NumTraits<Scalar>::IsSigned>
+struct signbit_impl;
+template <typename Scalar>
+struct signbit_impl<Scalar, false, true> {
+  static constexpr size_t Size = sizeof(Scalar);
+  static constexpr size_t Shift = (CHAR_BIT * Size) - 1;
+  using intSize_t = typename get_integer_by_size<Size>::signed_type;
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Scalar run(const Scalar& x) {
+    intSize_t a = bit_cast<intSize_t, Scalar>(x);
+    a = a >> Shift;
+    Scalar result = bit_cast<Scalar, intSize_t>(a);
+    return result;
+  }
+};
+template <typename Scalar>
+struct signbit_impl<Scalar, true, true> {
+  static constexpr size_t Size = sizeof(Scalar);
+  static constexpr size_t Shift = (CHAR_BIT * Size) - 1;
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar& x) { return x >> Shift; }
+};
+template <typename Scalar>
+struct signbit_impl<Scalar, true, false> {
+  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar&) { return Scalar(0); }
+};
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar signbit(const Scalar& x) {
+  return signbit_impl<Scalar>::run(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp(const T& x) {
+  EIGEN_USING_STD(exp);
+  return exp(x);
+}
+// MSVC screws up some edge-cases for std::exp(complex).
+#ifdef EIGEN_COMP_MSVC
+template <typename RealScalar>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<RealScalar> exp(const std::complex<RealScalar>& x) {
+  EIGEN_USING_STD(exp);
+  // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised.
+  // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised.
+  if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) {
+    return std::complex<RealScalar>(NumTraits<RealScalar>::quiet_NaN(), NumTraits<RealScalar>::quiet_NaN());
+  }
+  // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified)
+  // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified)
+  if ((real_ref(x) == NumTraits<RealScalar>::infinity() && !(isfinite)(imag_ref(x)))) {
+    return std::complex<RealScalar>(NumTraits<RealScalar>::infinity(), NumTraits<RealScalar>::quiet_NaN());
+  }
+  return exp(x);
+}
+#endif
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp(const float& x) {
+  return ::expf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp(const double& x) {
+  return ::exp(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<float> exp(const std::complex<float>& x) {
+  float com = ::expf(x.real());
+  float res_real = com * ::cosf(x.imag());
+  float res_imag = com * ::sinf(x.imag());
+  return std::complex<float>(res_real, res_imag);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<double> exp(const std::complex<double>& x) {
+  double com = ::exp(x.real());
+  double res_real = com * ::cos(x.imag());
+  double res_imag = com * ::sin(x.imag());
+  return std::complex<double>(res_real, res_imag);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp2(const T& x) {
+  EIGEN_USING_STD(exp2);
+  return exp2(x);
+}
+// MSVC screws up some edge-cases for std::exp2(complex).
+#ifdef EIGEN_COMP_MSVC
+template <typename RealScalar>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<RealScalar> exp2(const std::complex<RealScalar>& x) {
+  EIGEN_USING_STD(exp);
+  // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised.
+  // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised.
+  if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) {
+    return std::complex<RealScalar>(NumTraits<RealScalar>::quiet_NaN(), NumTraits<RealScalar>::quiet_NaN());
+  }
+  // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified)
+  // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified)
+  if ((real_ref(x) == NumTraits<RealScalar>::infinity() && !(isfinite)(imag_ref(x)))) {
+    return std::complex<RealScalar>(NumTraits<RealScalar>::infinity(), NumTraits<RealScalar>::quiet_NaN());
+  }
+  return exp2(x);
+}
+#endif
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp2, exp2)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp2(const float& x) {
+  return ::exp2f(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp2(const double& x) {
+  return ::exp2(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<float> exp2(const std::complex<float>& x) {
+  float com = ::exp2f(x.real());
+  float res_real = com * ::cosf(static_cast<float>(EIGEN_LN2) * x.imag());
+  float res_imag = com * ::sinf(static_cast<float>(EIGEN_LN2) * x.imag());
+  return std::complex<float>(res_real, res_imag);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex<double> exp2(const std::complex<double>& x) {
+  double com = ::exp2(x.real());
+  double res_real = com * ::cos(static_cast<double>(EIGEN_LN2) * x.imag());
+  double res_imag = com * ::sin(static_cast<double>(EIGEN_LN2) * x.imag());
+  return std::complex<double>(res_real, res_imag);
+}
+#endif
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) {
+  return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(expm1, expm1)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float expm1(const float& x) {
+  return ::expm1f(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double expm1(const double& x) {
+  return ::expm1(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T& x) {
+  EIGEN_USING_STD(cos);
+  return cos(x);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cos, cos)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cos(const float& x) {
+  return ::cosf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double cos(const double& x) {
+  return ::cos(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sin(const T& x) {
+  EIGEN_USING_STD(sin);
+  return sin(x);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sin, sin)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sin(const float& x) {
+  return ::sinf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double sin(const double& x) {
+  return ::sin(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tan(const T& x) {
+  EIGEN_USING_STD(tan);
+  return tan(x);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tan, tan)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tan(const float& x) {
+  return ::tanf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double tan(const double& x) {
+  return ::tan(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T acos(const T& x) {
+  EIGEN_USING_STD(acos);
+  return acos(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T acosh(const T& x) {
+  EIGEN_USING_STD(acosh);
+  return static_cast<T>(acosh(x));
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acosh, acosh)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float acos(const float& x) {
+  return ::acosf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double acos(const double& x) {
+  return ::acos(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T asin(const T& x) {
+  EIGEN_USING_STD(asin);
+  return asin(x);
+}
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T asinh(const T& x) {
+  EIGEN_USING_STD(asinh);
+  return static_cast<T>(asinh(x));
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asinh, asinh)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float asin(const float& x) {
+  return ::asinf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double asin(const double& x) {
+  return ::asin(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T atan(const T& x) {
+  EIGEN_USING_STD(atan);
+  return static_cast<T>(atan(x));
+}
+template <typename T, std::enable_if_t<!NumTraits<T>::IsComplex, int> = 0>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T atan2(const T& y, const T& x) {
+  EIGEN_USING_STD(atan2);
+  return static_cast<T>(atan2(y, x));
+}
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T atanh(const T& x) {
+  EIGEN_USING_STD(atanh);
+  return static_cast<T>(atanh(x));
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atanh, atanh)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float atan(const float& x) {
+  return ::atanf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double atan(const double& x) {
+  return ::atan(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cosh(const T& x) {
+  EIGEN_USING_STD(cosh);
+  return static_cast<T>(cosh(x));
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cosh, cosh)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cosh(const float& x) {
+  return ::coshf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double cosh(const double& x) {
+  return ::cosh(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sinh(const T& x) {
+  EIGEN_USING_STD(sinh);
+  return static_cast<T>(sinh(x));
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sinh, sinh)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sinh(const float& x) {
+  return ::sinhf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double sinh(const double& x) {
+  return ::sinh(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tanh(const T& x) {
+  EIGEN_USING_STD(tanh);
+  return tanh(x);
+}
+#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::ptanh_float(x); }
+#endif
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tanh, tanh)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(const float& x) {
+  return ::tanhf(x);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double tanh(const double& x) {
+  return ::tanh(x);
+}
+#endif
+template <typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T fmod(const T& a, const T& b) {
+  EIGEN_USING_STD(fmod);
+  return fmod(a, b);
+}
+#if defined(SYCL_DEVICE_ONLY)
+SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(fmod, fmod)
+#endif
+#if defined(EIGEN_GPUCC)
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float fmod(const float& a, const float& b) {
+  return ::fmodf(a, b);
+}
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double fmod(const double& a, const double& b) {
+  return ::fmod(a, b);
+}
+#endif
+#if defined(SYCL_DEVICE_ONLY)
+#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY
+#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY
+#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY
+#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY
+#undef SYCL_SPECIALIZE_INTEGER_TYPES_BINARY
+#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY
+#undef SYCL_SPECIALIZE_FLOATING_TYPES_BINARY
+#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY
+#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE
+#undef SYCL_SPECIALIZE_GEN_UNARY_FUNC
+#undef SYCL_SPECIALIZE_UNARY_FUNC
+#undef SYCL_SPECIALIZE_GEN1_BINARY_FUNC
+#undef SYCL_SPECIALIZE_GEN2_BINARY_FUNC
+#undef SYCL_SPECIALIZE_BINARY_FUNC
+#endif
+template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_left(const Scalar& a, int n) {
+  return a << n;
+}
+template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_right(const Scalar& a, int n) {
+  using UnsignedScalar = typename numext::get_integer_by_size<sizeof(Scalar)>::unsigned_type;
+  return bit_cast<Scalar, UnsignedScalar>(bit_cast<UnsignedScalar, Scalar>(a) >> n);
+}
+template <typename Scalar, typename Enable = std::enable_if_t<std::is_integral<Scalar>::value>>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar& a, int n) {
+  using SignedScalar = typename numext::get_integer_by_size<sizeof(Scalar)>::signed_type;
+  return bit_cast<Scalar, SignedScalar>(bit_cast<SignedScalar, Scalar>(a) >> n);
+}
+// Use std::fma if available.
+using std::fma;
+// Otherwise, rely on template implementation.
+template <typename Scalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar fma(const Scalar& x, const Scalar& y, const Scalar& z) {
+  return internal::fma_impl<Scalar>::run(x, y, z);
+}
+}  // end namespace numext
+namespace internal {
+template <typename T>
+EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex<T>& x) {
+  return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x));
+}
+template <typename T>
+EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x) {
+  return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x));
+}
+template <typename T>
+EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x) {
+  return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x));
+}
+/****************************************************************************
+ * Implementation of fuzzy comparisons                                       *
+ ****************************************************************************/
+template <typename Scalar, bool IsComplex, bool IsInteger>
+struct scalar_fuzzy_default_impl {};
+template <typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, false> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  template <typename OtherScalar>
+  EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
+                                                         const RealScalar& prec) {
+    return numext::abs(x) <= numext::abs(y) * prec;
+  }
+  EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) {
+    return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec;
+  }
+  EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) {
+    return x <= y || isApprox(x, y, prec);
+  }
+};
+template <typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, false, true> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  template <typename OtherScalar>
+  EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&) {
+    return x == Scalar(0);
+  }
+  EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&) { return x == y; }
+  EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&) {
+    return x <= y;
+  }
+};
+template <typename Scalar>
+struct scalar_fuzzy_default_impl<Scalar, true, false> {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  template <typename OtherScalar>
+  EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
+                                                         const RealScalar& prec) {
+    return numext::abs2(x) <= numext::abs2(y) * prec * prec;
+  }
+  EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) {
+    return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec;
+  }
+};
+template <typename Scalar>
+struct scalar_fuzzy_impl
+    : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
+template <typename Scalar, typename OtherScalar>
+EIGEN_DEVICE_FUNC inline bool isMuchSmallerThan(
+    const Scalar& x, const OtherScalar& y,
+    const typename NumTraits<Scalar>::Real& precision = NumTraits<Scalar>::dummy_precision()) {
+  return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline bool isApprox(
+    const Scalar& x, const Scalar& y,
+    const typename NumTraits<Scalar>::Real& precision = NumTraits<Scalar>::dummy_precision()) {
+  return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
+}
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline bool isApproxOrLessThan(
+    const Scalar& x, const Scalar& y,
+    const typename NumTraits<Scalar>::Real& precision = NumTraits<Scalar>::dummy_precision()) {
+  return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
+}
+/******************************************
+***  The special case of the  bool type ***
+******************************************/
+template <>
+struct scalar_fuzzy_impl<bool> {
+  typedef bool RealScalar;
+  template <typename OtherScalar>
+  EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) {
+    return !x;
+  }
+  EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { return x == y; }
+  EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&) {
+    return (!x) || y;
+  }
+};
+}  // end namespace internal
+// Default implementations that rely on other numext implementations
+namespace internal {
+// Specialization for complex types that are not supported by std::expm1.
+template <typename RealScalar>
+struct expm1_impl<std::complex<RealScalar>> {
+  EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar)
+  EIGEN_DEVICE_FUNC static inline std::complex<RealScalar> run(const std::complex<RealScalar>& x) {
+    RealScalar xr = x.real();
+    RealScalar xi = x.imag();
+    // expm1(z) = exp(z) - 1
+    //          = exp(x +  i * y) - 1
+    //          = exp(x) * (cos(y) + i * sin(y)) - 1
+    //          = exp(x) * cos(y) - 1 + i * exp(x) * sin(y)
+    // Imag(expm1(z)) = exp(x) * sin(y)
+    // Real(expm1(z)) = exp(x) * cos(y) - 1
+    //          = exp(x) * cos(y) - 1.
+    //          = expm1(x) + exp(x) * (cos(y) - 1)
+    //          = expm1(x) + exp(x) * (2 * sin(y / 2) ** 2)
+    RealScalar erm1 = numext::expm1<RealScalar>(xr);
+    RealScalar er = erm1 + RealScalar(1.);
+    RealScalar sin2 = numext::sin(xi / RealScalar(2.));
+    sin2 = sin2 * sin2;
+    RealScalar s = numext::sin(xi);
+    RealScalar real_part = erm1 - RealScalar(2.) * er * sin2;
+    return std::complex<RealScalar>(real_part, er * s);
+  }
+};
+template <typename T>
+struct rsqrt_impl {
+  EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE T run(const T& x) { return T(1) / numext::sqrt(x); }
+};
+#if defined(EIGEN_GPU_COMPILE_PHASE)
+template <typename T>
+struct conj_impl<std::complex<T>, true> {
+  EIGEN_DEVICE_FUNC static inline std::complex<T> run(const std::complex<T>& x) {
+    return std::complex<T>(numext::real(x), -numext::imag(x));
+  }
+};
+#endif
+}  // end namespace internal
+}  // end namespace Eigen
+#endif  // EIGEN_MATHFUNCTIONS_H
--- a/eigen-master/Eigen/src/Core/MathFunctionsImpl.h
+++ b/eigen-master/Eigen/src/Core/MathFunctionsImpl.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_MATHFUNCTIONSIMPL_H
+#define EIGEN_MATHFUNCTIONSIMPL_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+/** \internal Fast reciprocal using Newton-Raphson's method.
+ Preconditions:
+   1. The starting guess provided in approx_a_recip must have at least half
+      the leading mantissa bits in the correct result, such that a single
+      Newton-Raphson step is sufficient to get within 1-2 ulps of the correct
+      result.
+   2. If a is zero, approx_a_recip must be infinite with the same sign as a.
+   3. If a is infinite, approx_a_recip must be zero with the same sign as a.
+   If the preconditions are satisfied, which they are for for the _*_rcp_ps
+   instructions on x86, the result has a maximum relative error of 2 ulps,
+   and correctly handles reciprocals of zero, infinity, and NaN.
+*/
+template <typename Packet, int Steps>
+struct generic_reciprocal_newton_step {
+  static_assert(Steps > 0, "Steps must be at least 1.");
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_a_recip) {
+    using Scalar = typename unpacket_traits<Packet>::type;
+    const Packet two = pset1<Packet>(Scalar(2));
+    // Refine the approximation using one Newton-Raphson step:
+    //   x_{i} = x_{i-1} * (2 - a * x_{i-1})
+    const Packet x = generic_reciprocal_newton_step<Packet, Steps - 1>::run(a, approx_a_recip);
+    const Packet tmp = pnmadd(a, x, two);
+    // If tmp is NaN, it means that a is either +/-0 or +/-Inf.
+    // In this case return the approximation directly.
+    const Packet is_not_nan = pcmp_eq(tmp, tmp);
+    return pselect(is_not_nan, pmul(x, tmp), x);
+  }
+};
+template <typename Packet>
+struct generic_reciprocal_newton_step<Packet, 0> {
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
+    return approx_rsqrt;
+  }
+};
+/** \internal Fast reciprocal sqrt using Newton-Raphson's method.
+ Preconditions:
+   1. The starting guess provided in approx_a_recip must have at least half
+      the leading mantissa bits in the correct result, such that a single
+      Newton-Raphson step is sufficient to get within 1-2 ulps of the correct
+      result.
+   2. If a is zero, approx_a_recip must be infinite with the same sign as a.
+   3. If a is infinite, approx_a_recip must be zero with the same sign as a.
+   If the preconditions are satisfied, which they are for for the _*_rcp_ps
+   instructions on x86, the result has a maximum relative error of 2 ulps,
+   and correctly handles zero, infinity, and NaN. Positive denormals are
+   treated as zero.
+*/
+template <typename Packet, int Steps>
+struct generic_rsqrt_newton_step {
+  static_assert(Steps > 0, "Steps must be at least 1.");
+  using Scalar = typename unpacket_traits<Packet>::type;
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_rsqrt) {
+    const Scalar kMinusHalf = Scalar(-1) / Scalar(2);
+    const Packet cst_minus_half = pset1<Packet>(kMinusHalf);
+    const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
+    Packet inv_sqrt = approx_rsqrt;
+    for (int step = 0; step < Steps; ++step) {
+      // Refine the approximation using one Newton-Raphson step:
+      // h_n = (x * inv_sqrt) * inv_sqrt - 1 (so that h_n is nearly 0).
+      // inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n
+      Packet r2 = pmul(a, inv_sqrt);
+      Packet half_r = pmul(inv_sqrt, cst_minus_half);
+      Packet h_n = pmadd(r2, inv_sqrt, cst_minus_one);
+      inv_sqrt = pmadd(half_r, h_n, inv_sqrt);
+    }
+    // If x is NaN, then either:
+    // 1) the input is NaN
+    // 2) zero and infinity were multiplied
+    // In either of these cases, return approx_rsqrt
+    return pselect(pisnan(inv_sqrt), approx_rsqrt, inv_sqrt);
+  }
+};
+template <typename Packet>
+struct generic_rsqrt_newton_step<Packet, 0> {
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
+    return approx_rsqrt;
+  }
+};
+/** \internal Fast sqrt using Newton-Raphson's method.
+ Preconditions:
+   1. The starting guess for the reciprocal sqrt provided in approx_rsqrt must
+      have at least half the leading mantissa bits in the correct result, such
+      that a single Newton-Raphson step is sufficient to get within 1-2 ulps of
+      the correct result.
+   2. If a is zero, approx_rsqrt must be infinite.
+   3. If a is infinite, approx_rsqrt must be zero.
+   If the preconditions are satisfied, which they are for for the _*_rsqrt_ps
+   instructions on x86, the result has a maximum relative error of 2 ulps,
+   and correctly handles zero and infinity, and NaN. Positive denormal inputs
+   are treated as zero.
+*/
+template <typename Packet, int Steps = 1>
+struct generic_sqrt_newton_step {
+  static_assert(Steps > 0, "Steps must be at least 1.");
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_rsqrt) {
+    using Scalar = typename unpacket_traits<Packet>::type;
+    const Packet one_point_five = pset1<Packet>(Scalar(1.5));
+    const Packet minus_half = pset1<Packet>(Scalar(-0.5));
+    // If a is inf or zero, return a directly.
+    const Packet inf_mask = pcmp_eq(a, pset1<Packet>(NumTraits<Scalar>::infinity()));
+    const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask);
+    // Do a single step of Newton's iteration for reciprocal square root:
+    //   x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))).
+    // The Newton's step is computed this way to avoid over/under-flows.
+    Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
+    for (int step = 1; step < Steps; ++step) {
+      rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five));
+    }
+    // Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments.
+    // Return a itself for 0 or +inf, NaN for negative arguments.
+    return pselect(return_a, a, pmul(a, rsqrt));
+  }
+};
+template <typename RealScalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) {
+  // IEEE IEC 6059 special cases.
+  if ((numext::isinf)(x) || (numext::isinf)(y)) return NumTraits<RealScalar>::infinity();
+  if ((numext::isnan)(x) || (numext::isnan)(y)) return NumTraits<RealScalar>::quiet_NaN();
+  EIGEN_USING_STD(sqrt);
+  RealScalar p, qp;
+  p = numext::maxi(x, y);
+  if (numext::is_exactly_zero(p)) return RealScalar(0);
+  qp = numext::mini(y, x) / p;
+  return p * sqrt(RealScalar(1) + qp * qp);
+}
+template <typename Scalar>
+struct hypot_impl {
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  static EIGEN_DEVICE_FUNC inline RealScalar run(const Scalar& x, const Scalar& y) {
+    EIGEN_USING_STD(abs);
+    return positive_real_hypot<RealScalar>(abs(x), abs(y));
+  }
+};
+// Generic complex sqrt implementation that correctly handles corner cases
+// according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) {
+  // Computes the principal sqrt of the input.
+  //
+  // For a complex square root of the number x + i*y. We want to find real
+  // numbers u and v such that
+  //    (u + i*v)^2 = x + i*y  <=>
+  //    u^2 - v^2 + i*2*u*v = x + i*v.
+  // By equating the real and imaginary parts we get:
+  //    u^2 - v^2 = x
+  //    2*u*v = y.
+  //
+  // For x >= 0, this has the numerically stable solution
+  //    u = sqrt(0.5 * (x + sqrt(x^2 + y^2)))
+  //    v = y / (2 * u)
+  // and for x < 0,
+  //    v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))
+  //    u = y / (2 * v)
+  //
+  // Letting w = sqrt(0.5 * (|x| + |z|)),
+  //   if x == 0: u = w, v = sign(y) * w
+  //   if x > 0:  u = w, v = y / (2 * w)
+  //   if x < 0:  u = |y| / (2 * w), v = sign(y) * w
+  using T = typename NumTraits<ComplexT>::Real;
+  const T x = numext::real(z);
+  const T y = numext::imag(z);
+  const T zero = T(0);
+  const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y)));
+  return (numext::isinf)(y)           ? ComplexT(NumTraits<T>::infinity(), y)
+         : numext::is_exactly_zero(x) ? ComplexT(w, y < zero ? -w : w)
+         : x > zero                   ? ComplexT(w, y / (2 * w))
+                                      : ComplexT(numext::abs(y) / (2 * w), y < zero ? -w : w);
+}
+// Generic complex rsqrt implementation.
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) {
+  // Computes the principal reciprocal sqrt of the input.
+  //
+  // For a complex reciprocal square root of the number z = x + i*y. We want to
+  // find real numbers u and v such that
+  //    (u + i*v)^2 = 1 / (x + i*y)  <=>
+  //    u^2 - v^2 + i*2*u*v = x/|z|^2 - i*v/|z|^2.
+  // By equating the real and imaginary parts we get:
+  //    u^2 - v^2 = x/|z|^2
+  //    2*u*v = y/|z|^2.
+  //
+  // For x >= 0, this has the numerically stable solution
+  //    u = sqrt(0.5 * (x + |z|)) / |z|
+  //    v = -y / (2 * u * |z|)
+  // and for x < 0,
+  //    v = -sign(y) * sqrt(0.5 * (-x + |z|)) / |z|
+  //    u = -y / (2 * v * |z|)
+  //
+  // Letting w = sqrt(0.5 * (|x| + |z|)),
+  //   if x == 0: u = w / |z|, v = -sign(y) * w / |z|
+  //   if x > 0:  u = w / |z|, v = -y / (2 * w * |z|)
+  //   if x < 0:  u = |y| / (2 * w * |z|), v = -sign(y) * w / |z|
+  using T = typename NumTraits<ComplexT>::Real;
+  const T x = numext::real(z);
+  const T y = numext::imag(z);
+  const T zero = T(0);
+  const T abs_z = numext::hypot(x, y);
+  const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z));
+  const T woz = w / abs_z;
+  // Corner cases consistent with 1/sqrt(z) on gcc/clang.
+  return numext::is_exactly_zero(abs_z)               ? ComplexT(NumTraits<T>::infinity(), NumTraits<T>::quiet_NaN())
+         : ((numext::isinf)(x) || (numext::isinf)(y)) ? ComplexT(zero, zero)
+         : numext::is_exactly_zero(x)                 ? ComplexT(woz, y < zero ? woz : -woz)
+         : x > zero                                   ? ComplexT(woz, -y / (2 * w * abs_z))
+                    : ComplexT(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz);
+}
+template <typename ComplexT>
+EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) {
+  // Computes complex log.
+  using T = typename NumTraits<ComplexT>::Real;
+  T a = numext::abs(z);
+  EIGEN_USING_STD(atan2);
+  T b = atan2(z.imag(), z.real());
+  return ComplexT(numext::log(a), b);
+}
+// For generic scalars, use ternary select.
+template <typename Mask>
+struct scalar_select_mask<Mask, /*is_built_in_float*/ false> {
+  static EIGEN_DEVICE_FUNC inline bool run(const Mask& mask) { return numext::is_exactly_zero(mask); }
+};
+// For built-in float mask, bitcast the mask to its integer counterpart and use ternary select.
+template <typename Mask>
+struct scalar_select_mask<Mask, /*is_built_in_float*/ true> {
+  using IntegerType = typename numext::get_integer_by_size<sizeof(Mask)>::unsigned_type;
+  static EIGEN_DEVICE_FUNC inline bool run(const Mask& mask) {
+    return numext::is_exactly_zero(numext::bit_cast<IntegerType>(std::abs(mask)));
+  }
+};
+template <int Size = sizeof(long double)>
+struct ldbl_select_mask {
+  static constexpr int MantissaDigits = std::numeric_limits<long double>::digits;
+  static constexpr int NumBytes = (MantissaDigits == 64 ? 80 : 128) / CHAR_BIT;
+  static EIGEN_DEVICE_FUNC inline bool run(const long double& mask) {
+    const uint8_t* mask_bytes = reinterpret_cast<const uint8_t*>(&mask);
+    for (Index i = 0; i < NumBytes; i++) {
+      if (mask_bytes[i] != 0) return false;
+    }
+    return true;
+  }
+};
+template <>
+struct ldbl_select_mask<sizeof(double)> : scalar_select_mask<double> {};
+template <>
+struct scalar_select_mask<long double, true> : ldbl_select_mask<> {};
+template <typename RealMask>
+struct scalar_select_mask<std::complex<RealMask>, false> {
+  using impl = scalar_select_mask<RealMask>;
+  static EIGEN_DEVICE_FUNC inline bool run(const std::complex<RealMask>& mask) {
+    return impl::run(numext::real(mask)) && impl::run(numext::imag(mask));
+  }
+};
+}  // end namespace internal
+}  // end namespace Eigen
+#endif  // EIGEN_MATHFUNCTIONSIMPL_H
--- a/eigen-master/Eigen/src/Core/Matrix.h
+++ b/eigen-master/Eigen/src/Core/Matrix.h
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifndef EIGEN_MATRIX_H
+#define EIGEN_MATRIX_H
+// IWYU pragma: private
+#include "./InternalHeaderCheck.h"
+namespace Eigen {
+namespace internal {
+template <typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+struct traits<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_>> {
+ private:
+  constexpr static int size = internal::size_at_compile_time(Rows_, Cols_);
+  typedef typename find_best_packet<Scalar_, size>::type PacketScalar;
+  enum {
+    row_major_bit = Options_ & RowMajor ? RowMajorBit : 0,
+    is_dynamic_size_storage = MaxRows_ == Dynamic || MaxCols_ == Dynamic,
+    max_size = is_dynamic_size_storage ? Dynamic : MaxRows_ * MaxCols_,
+    default_alignment = compute_default_alignment<Scalar_, max_size>::value,
+    actual_alignment = ((Options_ & DontAlign) == 0) ? default_alignment : 0,
+    required_alignment = unpacket_traits<PacketScalar>::alignment,
+    packet_access_bit = (packet_traits<Scalar_>::Vectorizable &&
+                         (EIGEN_UNALIGNED_VECTORIZE || (int(actual_alignment) >= int(required_alignment))))
+                            ? PacketAccessBit
+                            : 0
+  };
+ public:
+  typedef Scalar_ Scalar;
+  typedef Dense StorageKind;
+  typedef Eigen::Index StorageIndex;
+  typedef MatrixXpr XprKind;
+  enum {
+    RowsAtCompileTime = Rows_,
+    ColsAtCompileTime = Cols_,
+    MaxRowsAtCompileTime = MaxRows_,
+    MaxColsAtCompileTime = MaxCols_,
+    Flags = compute_matrix_flags(Options_),
+    Options = Options_,
+    InnerStrideAtCompileTime = 1,
+    OuterStrideAtCompileTime = (int(Options) & int(RowMajor)) ? ColsAtCompileTime : RowsAtCompileTime,
+    // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase
+    EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit,
+    Alignment = actual_alignment
+  };
+};
+}  // namespace internal
+/** \class Matrix
+ * \ingroup Core_Module
+ *
+ * \brief The matrix class, also used for vectors and row-vectors
+ *
+ * The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen.
+ * Vectors are matrices with one column, and row-vectors are matrices with one row.
+ *
+ * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note").
+ *
+ * The first three template parameters are required:
+ * \tparam Scalar_ Numeric type, e.g. float, double, int or std::complex<float>.
+ *                 User defined scalar types are supported as well (see \ref user_defined_scalars "here").
+ * \tparam Rows_ Number of rows, or \b Dynamic
+ * \tparam Cols_ Number of columns, or \b Dynamic
+ *
+ * The remaining template parameters are optional -- in most cases you don't have to worry about them.
+ * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of either
+ *                 \b #AutoAlign or \b #DontAlign.
+ *                 The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter
+ * controls alignment, which is required for vectorization. It defaults to aligning matrices except for fixed sizes that
+ * aren't a multiple of the packet size. \tparam MaxRows_ Maximum number of rows. Defaults to \a Rows_ (\ref maxrows
+ * "note"). \tparam MaxCols_ Maximum number of columns. Defaults to \a Cols_ (\ref maxrows "note").
+ *
+ * Eigen provides a number of typedefs covering the usual cases. Here are some examples:
+ *
+ * \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix<double, 2, 2>)
+ * \li \c Vector4f is a vector of 4 floats (\c Matrix<float, 4, 1>)
+ * \li \c RowVector3i is a row-vector of 3 ints (\c Matrix<int, 1, 3>)
+ *
+ * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix<float, Dynamic, Dynamic>)
+ * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix<float, Dynamic, 1>)
+ *
+ * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix<float, 2, Dynamic>)
+ * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix<double, Dynamic, 3>)
+ *
+ * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs.
+ *
+ * You can access elements of vectors and matrices using normal subscripting:
+ *
+ * \code
+ * Eigen::VectorXd v(10);
+ * v[0] = 0.1;
+ * v[1] = 0.2;
+ * v(0) = 0.3;
+ * v(1) = 0.4;
+ *
+ * Eigen::MatrixXi m(10, 10);
+ * m(0, 1) = 1;
+ * m(0, 2) = 2;
+ * m(0, 3) = 3;
+ * \endcode
+ *
+ * This class can be extended with the help of the plugin mechanism described on the page
+ * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN.
+ *
+ * <i><b>Some notes:</b></i>
+ *
+ * <dl>
+ * <dt><b>\anchor dense Dense versus sparse:</b></dt>
+ * <dd>This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the
+ * Sparse module.
+ *
+ * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary
+ * contiguous array. This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero
+ * coefficients.</dd>
+ *
+ * <dt><b>\anchor fixedsize Fixed-size versus dynamic-size:</b></dt>
+ * <dd>Fixed-size means that the numbers of rows and columns are known at compile-time. In this case, Eigen allocates
+ * the array of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices,
+ * typically up to 4x4, sometimes up to 16x16. Larger matrices should be declared as dynamic-size even if one happens to
+ * know their size at compile-time.
+ *
+ * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they
+ * are runtime variables, and the array of coefficients is allocated dynamically on the heap.
+ *
+ * Note that \em dense matrices, be they Fixed-size or Dynamic-size, <em>do not</em> expand dynamically in the sense of
+ * a std::map. If you want this behavior, see the Sparse module.</dd>
+ *
+ * <dt><b>\anchor maxrows MaxRows_ and MaxCols_:</b></dt>
+ * <dd>In most cases, one just leaves these parameters to the default values.
+ * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases
+ * when the exact numbers of rows and columns are not known at compile-time, but it is known at compile-time that they
+ * cannot exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case
+ * MaxRows_ and MaxCols_ are the dimensions of the original matrix, while Rows_ and Cols_ are Dynamic.</dd>
+ * </dl>
+ *
+ * <i><b>ABI and storage layout</b></i>
+ *
+ * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3.
+ * <table  class="manual">
+ * <tr><th>Matrix type</th><th>Equivalent C structure</th></tr>
+ * <tr><td>\code Matrix<T,Dynamic,Dynamic> \endcode</td><td>\code
+ * struct {
+ *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
+ *   Eigen::Index rows, cols;
+ *  };
+ * \endcode</td></tr>
+ * <tr class="alt"><td>\code
+ * Matrix<T,Dynamic,1>
+ * Matrix<T,1,Dynamic> \endcode</td><td>\code
+ * struct {
+ *   T *data;                  // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0
+ *   Eigen::Index size;
+ *  };
+ * \endcode</td></tr>
+ * <tr><td>\code Matrix<T,Rows,Cols> \endcode</td><td>\code
+ * struct {
+ *   T data[Rows*Cols];        // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0
+ *  };
+ * \endcode</td></tr>
+ * <tr class="alt"><td>\code Matrix<T,Dynamic,Dynamic,0,MaxRows,MaxCols> \endcode</td><td>\code
+ * struct {
+ *   T data[MaxRows*MaxCols];  // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0
+ *   Eigen::Index rows, cols;
+ *  };
+ * \endcode</td></tr>
+ * </table>
+ * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest
+ * possible power-of-two smaller to EIGEN_MAX_STATIC_ALIGN_BYTES.
+ *
+ * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy,
+ * \ref TopicStorageOrders
+ */
+template <typename Scalar_, int Rows_, int Cols_, int Options_, int MaxRows_, int MaxCols_>
+class Matrix : public PlainObjectBase<Matrix<Scalar_, Rows_, Cols_, Options_, MaxRows_, MaxCols_>> {
+ public:
+  /** \brief Base class typedef.
+   * \sa PlainObjectBase
+   */
+  typedef PlainObjectBase<Matrix> Base;
+  enum { Options = Options_ };
+  EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
+  typedef typename Base::PlainObject PlainObject;
+  using Base::base;
+  using Base::coeffRef;
+  /**
+   * \brief Assigns matrices to each other.
+   *
+   * \note This is a special case of the templated operator=. Its purpose is
+   * to prevent a default operator= from hiding the templated operator=.
+   *
+   * \callgraph
+   */
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(const Matrix& other) { return Base::_set(other); }
+  /** \internal
+   * \brief Copies the value of the expression \a other into \c *this with automatic resizing.
+   *
+   * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized),
+   * it will be initialized.
+   *
+   * Note that copying a row-vector into a vector (and conversely) is allowed.
+   * The resizing, if any, is then done in the appropriate way so that row-vectors
+   * remain row-vectors and vectors remain vectors.
+   */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase<OtherDerived>& other) {
+    return Base::_set(other);
+  }
+  /**
+   * \brief Copies the generic expression \a other into *this.
+   * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
+   */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived>& other) {
+    return Base::operator=(other);
+  }
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func) {
+    return Base::operator=(func);
+  }
+  /** \brief Default constructor.
+   *
+   * For fixed-size matrices, does nothing.
+   *
+   * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix
+   * is called a null matrix. This constructor is the unique way to create null matrices: resizing
+   * a matrix to 0 is not supported.
+   *
+   * \sa resize(Index,Index)
+   */
+#if defined(EIGEN_INITIALIZE_COEFFS)
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
+#else
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() = default;
+#endif
+  /** \brief Move constructor */
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix&&) = default;
+  /** \brief Moves the matrix into the other one.
+   *
+   */
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other) noexcept(
+      std::is_nothrow_move_assignable<Scalar>::value) {
+    Base::operator=(std::move(other));
+    return *this;
+  }
+  /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients.
+   *
+   * \only_for_vectors
+   *
+   * This constructor is for 1D array or vectors with more than 4 coefficients.
+   *
+   * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this
+   * constructor must match the the fixed number of rows (resp. columns) of \c *this.
+   *
+   *
+   * Example: \include Matrix_variadic_ctor_cxx11.cpp
+   * Output: \verbinclude Matrix_variadic_ctor_cxx11.out
+   *
+   * \sa Matrix(const std::initializer_list<std::initializer_list<Scalar>>&)
+   */
+  template <typename... ArgTypes>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3,
+                                               const ArgTypes&... args)
+      : Base(a0, a1, a2, a3, args...) {}
+  /** \brief Constructs a Matrix and initializes it from the coefficients given as initializer-lists grouped by row.
+   * \cpp11
+   * \anchor matrix_initializer_list
+   *
+   * In the general case, the constructor takes a list of rows, each row being represented as a list of coefficients:
+   *
+   * Example: \include Matrix_initializer_list_23_cxx11.cpp
+   * Output: \verbinclude Matrix_initializer_list_23_cxx11.out
+   *
+   * Each of the inner initializer lists must contain the exact same number of elements, otherwise an assertion is
+   * triggered.
+   *
+   * In the case of a compile-time column vector, implicit transposition from a single row is allowed.
+   * Therefore <code>VectorXd{{1,2,3,4,5}}</code> is legal and the more verbose syntax
+   * <code>RowVectorXd{{1},{2},{3},{4},{5}}</code> can be avoided:
+   *
+   * Example: \include Matrix_initializer_list_vector_cxx11.cpp
+   * Output: \verbinclude Matrix_initializer_list_vector_cxx11.out
+   *
+   * In the case of fixed-sized matrices, the initializer list sizes must exactly match the matrix sizes,
+   * and implicit transposition is allowed for compile-time vectors only.
+   *
+   * \sa Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2,  const Scalar& a3, const ArgTypes&... args)
+   */
+  EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE Matrix(
+      const std::initializer_list<std::initializer_list<Scalar>>& list)
+      : Base(list) {}
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  // This constructor is for both 1x1 matrices and dynamic vectors
+  template <typename T>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Matrix(const T& x) {
+    Base::template _init1<T>(x);
+  }
+  template <typename T0, typename T1>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) {
+    Base::template _init2<T0, T1>(x, y);
+  }
+#else
+  /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */
+  EIGEN_DEVICE_FUNC explicit Matrix(const Scalar* data);
+  /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
+   *
+   * This is useful for dynamic-size vectors. For fixed-size vectors,
+   * it is redundant to pass these parameters, so one should use the default constructor
+   * Matrix() instead.
+   *
+   * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance,
+   * calling Matrix<double,1,1>(1) will call the initialization constructor: Matrix(const Scalar&).
+   * For fixed-size \c 1x1 matrices it is therefore recommended to use the default
+   * constructor Matrix() instead, especially when using one of the non standard
+   * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
+   */
+  EIGEN_STRONG_INLINE explicit Matrix(Index dim);
+  /** \brief Constructs an initialized 1x1 matrix with the given coefficient
+   * \sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...) */
+  Matrix(const Scalar& x);
+  /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns.
+   *
+   * This is useful for dynamic-size matrices. For fixed-size matrices,
+   * it is redundant to pass these parameters, so one should use the default constructor
+   * Matrix() instead.
+   *
+   * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance,
+   * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y).
+   * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default
+   * constructor Matrix() instead, especially when using one of the non standard
+   * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives).
+   */
+  EIGEN_DEVICE_FUNC Matrix(Index rows, Index cols);
+  /** \brief Constructs an initialized 2D vector with given coefficients
+   * \sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...) */
+  Matrix(const Scalar& x, const Scalar& y);
+#endif  // end EIGEN_PARSED_BY_DOXYGEN
+  /** \brief Constructs an initialized 3D vector with given coefficients
+   * \sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...)
+   */
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3)
+    m_storage.data()[0] = x;
+    m_storage.data()[1] = y;
+    m_storage.data()[2] = z;
+  }
+  /** \brief Constructs an initialized 4D vector with given coefficients
+   * \sa Matrix(const Scalar&, const Scalar&, const Scalar&,  const Scalar&, const ArgTypes&...)
+   */
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) {
+    EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4)
+    m_storage.data()[0] = x;
+    m_storage.data()[1] = y;
+    m_storage.data()[2] = z;
+    m_storage.data()[3] = w;
+  }
+  /** \brief Copy constructor */
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(const Matrix&) = default;
+  /** \brief Copy constructor for generic expressions.
+   * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
+   */
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived>& other) : Base(other.derived()) {}
+  EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return 1; }
+  EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return this->innerSize(); }
+  /////////// Geometry module ///////////
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC explicit Matrix(const RotationBase<OtherDerived, ColsAtCompileTime>& r);
+  template <typename OtherDerived>
+  EIGEN_DEVICE_FUNC Matrix& operator=(const RotationBase<OtherDerived, ColsAtCompileTime>& r);
+// allow to extend Matrix outside Eigen
+#ifdef EIGEN_MATRIX_PLUGIN
+#include EIGEN_MATRIX_PLUGIN
+#endif
+ protected:
+  template <typename Derived, typename OtherDerived, bool IsVector>
+  friend struct internal::conservative_resize_like_impl;
+  using Base::m_storage;
+};
+/** \defgroup matrixtypedefs Global matrix typedefs
+ *
+ * \ingroup Core_Module
+ *
+ * %Eigen defines several typedef shortcuts for most common matrix and vector types.
+ *
+ * The general patterns are the following:
+ *
+ * \c MatrixSizeType where \c Size can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size,
+ * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd
+ * for complex double.
+ *
+ * For example, \c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \c MatrixXf is a dynamic-size matrix of
+ * floats.
+ *
+ * There are also \c VectorSizeType and \c RowVectorSizeType which are self-explanatory. For example, \c Vector4cf is
+ * a fixed-size vector of 4 complex floats.
+ *
+ * With \cpp11, template alias are also defined for common sizes.
+ * They follow the same pattern as above except that the scalar type suffix is replaced by a
+ * template parameter, i.e.:
+ *   - `MatrixSize<Type>` where `Size` can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size.
+ *   - `MatrixXSize<Type>` and `MatrixSizeX<Type>` where `Size` can be \c 2,\c 3,\c 4 for hybrid dynamic/fixed matrices.
+ *   - `VectorSize<Type>` and `RowVectorSize<Type>` for column and row vectors.
+ *
+ * With \cpp11, you can also use fully generic column and row vector types: `Vector<Type,Size>` and
+ * `RowVector<Type,Size>`.
+ *
+ * \sa class Matrix
+ */
+#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix)    \
+  /** \ingroup matrixtypedefs */                                   \
+  /** \brief `Size`&times;`Size` matrix of type `Type`. */         \
+  typedef Matrix<Type, Size, Size> Matrix##SizeSuffix##TypeSuffix; \
+  /** \ingroup matrixtypedefs */                                   \
+  /** \brief `Size`&times;`1` vector of type `Type`. */            \
+  typedef Matrix<Type, Size, 1> Vector##SizeSuffix##TypeSuffix;    \
+  /** \ingroup matrixtypedefs */                                   \
+  /** \brief `1`&times;`Size` vector of type `Type`. */            \
+  typedef Matrix<Type, 1, Size> RowVector##SizeSuffix##TypeSuffix;
+#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size)          \
+  /** \ingroup matrixtypedefs */                                   \
+  /** \brief `Size`&times;`Dynamic` matrix of type `Type`. */      \
+  typedef Matrix<Type, Size, Dynamic> Matrix##Size##X##TypeSuffix; \
+  /** \ingroup matrixtypedefs */                                   \
+  /** \brief `Dynamic`&times;`Size` matrix of type `Type`. */      \
+  typedef Matrix<Type, Dynamic, Size> Matrix##X##Size##TypeSuffix;
+#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \
+  EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2)           \
+  EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3)           \
+  EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4)           \
+  EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X)     \
+  EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2)        \
+  EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3)        \
+  EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<float>, cf)
+EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex<double>, cd)
+#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES
+#undef EIGEN_MAKE_TYPEDEFS
+#undef EIGEN_MAKE_FIXED_TYPEDEFS
+#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix)                    \
+  /** \ingroup matrixtypedefs */                                 \
+  /** \brief \cpp11 `Size`&times;`Size` matrix of type `Type`.*/ \
+  template <typename Type>                                       \
+  using Matrix##SizeSuffix = Matrix<Type, Size, Size>;           \
+  /** \ingroup matrixtypedefs */                                 \
+  /** \brief \cpp11 `Size`&times;`1` vector of type `Type`.*/    \
+  template <typename Type>                                       \
+  using Vector##SizeSuffix = Matrix<Type, Size, 1>;              \
+  /** \ingroup matrixtypedefs */                                 \
+  /** \brief \cpp11 `1`&times;`Size` vector of type `Type`.*/    \
+  template <typename Type>                                       \
+  using RowVector##SizeSuffix = Matrix<Type, 1, Size>;
+#define EIGEN_MAKE_FIXED_TYPEDEFS(Size)                              \
+  /** \ingroup matrixtypedefs */                                     \
+  /** \brief \cpp11 `Size`&times;`Dynamic` matrix of type `Type` */  \
+  template <typename Type>                                           \
+  using Matrix##Size##X = Matrix<Type, Size, Dynamic>;               \
+  /** \ingroup matrixtypedefs */                                     \
+  /** \brief \cpp11 `Dynamic`&times;`Size` matrix of type `Type`. */ \
+  template <typename Type>                                           \
+  using Matrix##X##Size = Matrix<Type, Dynamic, Size>;
+EIGEN_MAKE_TYPEDEFS(2, 2)
+EIGEN_MAKE_TYPEDEFS(3, 3)
+EIGEN_MAKE_TYPEDEFS(4, 4)
+EIGEN_MAKE_TYPEDEFS(Dynamic, X)
+EIGEN_MAKE_FIXED_TYPEDEFS(2)
+EIGEN_MAKE_FIXED_TYPEDEFS(3)
+EIGEN_MAKE_FIXED_TYPEDEFS(4)
+/** \ingroup matrixtypedefs
+ * \brief \cpp11 `Size`&times;`1` vector of type `Type`. */
+template <typename Type, int Size>
+using Vector = Matrix<Type, Size, 1>;
+/** \ingroup matrixtypedefs
+ * \brief \cpp11 `1`&times;`Size` vector of type `Type`. */
+template <typename Type, int Size>
+using RowVector = Matrix<Type, 1, Size>;
+#undef EIGEN_MAKE_TYPEDEFS
+#undef EIGEN_MAKE_FIXED_TYPEDEFS
+}  // end namespace Eigen
+#endif  // EIGEN_MATRIX_H