Commit da2c1226 authored by lucasb-eyer's avatar lucasb-eyer
Browse files

Initial import of code.

parent 3ae01ffb
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
namespace Eigen {
namespace internal {
/**********************************************************************
* This file implements selfadjoint matrix-vector multiplication using BLAS
**********************************************************************/
// symv/hemv specialization
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
struct selfadjoint_matrix_vector_product_symv :
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
#define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static EIGEN_DONT_INLINE void run( \
Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
enum {\
IsColMajor = StorageOrder==ColMajor \
}; \
if (IsColMajor == ConjugateLhs) {\
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
} else {\
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
size, lhs, lhsStride, _rhs, rhsIncr, res, alpha); \
}\
} \
}; \
EIGEN_MKL_SYMV_SPECIALIZE(double)
EIGEN_MKL_SYMV_SPECIALIZE(float)
EIGEN_MKL_SYMV_SPECIALIZE(dcomplex)
EIGEN_MKL_SYMV_SPECIALIZE(scomplex)
#define EIGEN_MKL_SYMV_SPECIALIZATION(EIGTYPE,MKLTYPE,MKLFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
static EIGEN_DONT_INLINE void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
{ \
enum {\
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0 \
}; \
MKL_INT n=size, lda=lhsStride, incx=rhsIncr, incy=1; \
MKLTYPE alpha_, beta_; \
const EIGTYPE *x_ptr, myone(1); \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
assign_scalar_eig2mkl(alpha_, alpha); \
assign_scalar_eig2mkl(beta_, myone); \
SYMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const SYMVVector, 0, InnerStride<> > map_x(_rhs,size,1,InnerStride<>(incx)); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=_rhs; \
MKLFUNC(&uplo, &n, &alpha_, (const MKLTYPE*)lhs, &lda, (const MKLTYPE*)x_ptr, &incx, &beta_, (MKLTYPE*)res, &incy); \
}\
};
EIGEN_MKL_SYMV_SPECIALIZATION(double, double, dsymv)
EIGEN_MKL_SYMV_SPECIALIZATION(float, float, ssymv)
EIGEN_MKL_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
EIGEN_MKL_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_MKL_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_SELFADJOINT_PRODUCT_H
#define EIGEN_SELFADJOINT_PRODUCT_H
/**********************************************************************
* This file implements a self adjoint product: C += A A^T updating only
* half of the selfadjoint matrix C.
* It corresponds to the level 3 SYRK and level 2 SYR Blas routines.
**********************************************************************/
namespace Eigen {
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update;
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
{
static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
{
internal::conj_if<ConjRhs> cj;
typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjRhsType;
for (Index i=0; i<size; ++i)
{
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
+= (alpha * cj(vec[i])) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
}
}
};
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
{
static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
{
selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vec,alpha);
}
};
template<typename MatrixType, typename OtherType, int UpLo, bool OtherIsVector = OtherType::IsVectorAtCompileTime>
struct selfadjoint_product_selector;
template<typename MatrixType, typename OtherType, int UpLo>
struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
{
static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef internal::blas_traits<OtherType> OtherBlasTraits;
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
enum {
StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
UseOtherDirectly = _ActualOtherType::InnerStrideAtCompileTime==1
};
internal::gemv_static_vector_if<Scalar,OtherType::SizeAtCompileTime,OtherType::MaxSizeAtCompileTime,!UseOtherDirectly> static_other;
ei_declare_aligned_stack_constructed_variable(Scalar, actualOtherPtr, other.size(),
(UseOtherDirectly ? const_cast<Scalar*>(actualOther.data()) : static_other.data()));
if(!UseOtherDirectly)
Map<typename _ActualOtherType::PlainObject>(actualOtherPtr, actualOther.size()) = actualOther;
selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
(!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
}
};
template<typename MatrixType, typename OtherType, int UpLo>
struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
{
static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef internal::blas_traits<OtherType> OtherBlasTraits;
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
typename internal::add_const_on_value_type<ActualOtherType>::type actualOther = OtherBlasTraits::extract(other.derived());
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
internal::general_matrix_matrix_triangular_product<Index,
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
::run(mat.cols(), actualOther.cols(),
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
mat.data(), mat.outerStride(), actualAlpha);
}
};
// high level API
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
{
selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
return *this;
}
} // end namespace Eigen
#endif // EIGEN_SELFADJOINT_PRODUCT_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_SELFADJOINTRANK2UPTADE_H
#define EIGEN_SELFADJOINTRANK2UPTADE_H
namespace Eigen {
namespace internal {
/* Optimized selfadjoint matrix += alpha * uv' + conj(alpha)*vu'
* It corresponds to the Level2 syr2 BLAS routine
*/
template<typename Scalar, typename Index, typename UType, typename VType, int UpLo>
struct selfadjoint_rank2_update_selector;
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
{
static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
{
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
(conj(alpha) * conj(u.coeff(i))) * v.tail(size-i)
+ (alpha * conj(v.coeff(i))) * u.tail(size-i);
}
}
};
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
{
static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
(conj(alpha) * conj(u.coeff(i))) * v.head(i+1)
+ (alpha * conj(v.coeff(i))) * u.head(i+1);
}
};
template<bool Cond, typename T> struct conj_expr_if
: conditional<!Cond, const T&,
CwiseUnaryOp<scalar_conjugate_op<typename traits<T>::Scalar>,T> > {};
} // end namespace internal
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU, typename DerivedV>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
{
typedef internal::blas_traits<DerivedU> UBlasTraits;
typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
typedef typename internal::remove_all<ActualUType>::type _ActualUType;
typename internal::add_const_on_value_type<ActualUType>::type actualU = UBlasTraits::extract(u.derived());
typedef internal::blas_traits<DerivedV> VBlasTraits;
typedef typename VBlasTraits::DirectLinearAccessType ActualVType;
typedef typename internal::remove_all<ActualVType>::type _ActualVType;
typename internal::add_const_on_value_type<ActualVType>::type actualV = VBlasTraits::extract(v.derived());
// If MatrixType is row major, then we use the routine for lower triangular in the upper triangular case and
// vice versa, and take the complex conjugate of all coefficients and vector entries.
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived())
* internal::conj(VBlasTraits::extractScalarFactor(v.derived()));
if (IsRowMajor)
actualAlpha = internal::conj(actualAlpha);
internal::selfadjoint_rank2_update_selector<Scalar, Index,
typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type,
typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type,
(IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
::run(_expression().const_cast_derived().data(),_expression().outerStride(),actualU,actualV,actualAlpha);
return *this;
}
} // end namespace Eigen
#endif // EIGEN_SELFADJOINTRANK2UPTADE_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_H
namespace Eigen {
namespace internal {
// template<typename Scalar, int mr, int StorageOrder, bool Conjugate, int Mode>
// struct gemm_pack_lhs_triangular
// {
// Matrix<Scalar,mr,mr,
// void operator()(Scalar* blockA, const EIGEN_RESTRICT Scalar* _lhs, int lhsStride, int depth, int rows)
// {
// conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
// const_blas_data_mapper<Scalar, StorageOrder> lhs(_lhs,lhsStride);
// int count = 0;
// const int peeled_mc = (rows/mr)*mr;
// for(int i=0; i<peeled_mc; i+=mr)
// {
// for(int k=0; k<depth; k++)
// for(int w=0; w<mr; w++)
// blockA[count++] = cj(lhs(i+w, k));
// }
// for(int i=peeled_mc; i<rows; i++)
// {
// for(int k=0; k<depth; k++)
// blockA[count++] = cj(lhs(i, k));
// }
// }
// };
/* Optimized triangular matrix * matrix (_TRMM++) product built on top of
* the general matrix matrix product.
*/
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int Version = Specialized>
struct product_triangular_matrix_matrix;
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,RowMajor,Version>
{
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_triangular_matrix_matrix<Scalar, Index,
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
(!LhsIsTriangular),
RhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateRhs,
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateLhs,
ColMajor>
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
}
};
// implements col-major += alpha * op(triangular) * op(general)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = 2 * EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
static EIGEN_DONT_INLINE void run(
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_rows,_depth);
Index rows = IsLower ? _rows : diagSize;
Index depth = IsLower ? diagSize : _depth;
Index cols = _cols;
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
else
triangularBuffer.diagonal().setOnes();
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=IsLower ? depth : 0;
IsLower ? k2>0 : k2<depth;
IsLower ? k2-=kc : k2+=kc)
{
Index actual_kc = (std::min)(IsLower ? k2 : depth-k2, kc);
Index actual_k2 = IsLower ? k2-actual_kc : k2;
// align blocks with the end of the triangular part for trapezoidal lhs
if((!IsLower)&&(k2<rows)&&(k2+actual_kc>rows))
{
actual_kc = rows-k2;
k2 = k2+actual_kc-kc;
}
pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);
// the selected lhs's panel has to be split in three different parts:
// 1 - the part which is zero => skip it
// 2 - the diagonal block => special kernel
// 3 - the dense panel below (lower case) or above (upper case) the diagonal block => GEPP
// the block diagonal, if any:
if(IsLower || actual_k2<rows)
{
// for each small vertical panels of lhs
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
Index startBlock = actual_k2+k1;
Index blockBOffset = k1;
// => GEBP with the micro triangular block
// The trick is to pack this micro block while filling the opposite triangular part with zeros.
// To this end we do an extra triangular copy to a small temporary buffer
for (Index k=0;k<actualPanelWidth;++k)
{
if (SetDiag)
triangularBuffer.coeffRef(k,k) = lhs(startBlock+k,startBlock+k);
for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
}
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
// GEBP with remaining micro panel
if (lengthTarget>0)
{
Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
}
}
}
// the part below (lower case) or above (upper case) the diagonal => GEPP
{
Index start = IsLower ? k2 : 0;
Index end = IsLower ? rows : (std::min)(actual_k2,rows);
for(Index i2=start; i2<end; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,end)-i2;
gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
}
}
}
}
};
// implements col-major += alpha * op(general) * op(triangular)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower,
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1
};
static EIGEN_DONT_INLINE void run(
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_cols,_depth);
Index rows = _rows;
Index depth = IsLower ? _depth : diagSize;
Index cols = IsLower ? diagSize : _cols;
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
else
triangularBuffer.diagonal().setOnes();
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
for(Index k2=IsLower ? 0 : depth;
IsLower ? k2<depth : k2>0;
IsLower ? k2+=kc : k2-=kc)
{
Index actual_kc = (std::min)(IsLower ? depth-k2 : k2, kc);
Index actual_k2 = IsLower ? k2 : k2-actual_kc;
// align blocks with the end of the triangular part for trapezoidal rhs
if(IsLower && (k2<cols) && (actual_k2+actual_kc>cols))
{
actual_kc = cols-k2;
k2 = actual_k2 + actual_kc - kc;
}
// remaining size
Index rs = IsLower ? (std::min)(cols,actual_k2) : cols - k2;
// size of the triangular part
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
Scalar* geb = blockB+ts*ts;
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
// pack the triangular part of the rhs padding the unrolled blocks with zeros
if(ts>0)
{
for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
Index actual_j2 = actual_k2 + j2;
Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
// general part
pack_rhs_panel(blockB+j2*actual_kc,
&rhs(actual_k2+panelOffset, actual_j2), rhsStride,
panelLength, actualPanelWidth,
actual_kc, panelOffset);
// append the triangular part via a temporary buffer
for (Index j=0;j<actualPanelWidth;++j)
{
if (SetDiag)
triangularBuffer.coeffRef(j,j) = rhs(actual_j2+j,actual_j2+j);
for (Index k=IsLower ? j+1 : 0; IsLower ? k<actualPanelWidth : k<j; ++k)
triangularBuffer.coeffRef(k,j) = rhs(actual_j2+k,actual_j2+j);
}
pack_rhs_panel(blockB+j2*actual_kc,
triangularBuffer.data(), triangularBuffer.outerStride(),
actualPanelWidth, actualPanelWidth,
actual_kc, j2);
}
}
for (Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = (std::min)(mc,rows-i2);
pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
// triangular kernel
if(ts>0)
{
for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
Index blockOffset = IsLower ? j2 : 0;
gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride,
blockA, blockB+j2*actual_kc,
actual_mc, panelLength, actualPanelWidth,
alpha,
actual_kc, actual_kc, // strides
blockOffset, blockOffset,// offsets
blockW); // workspace
}
}
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
blockA, geb, actual_mc, actual_kc, rs,
alpha,
-1, -1, 0, 0, blockW);
}
}
}
};
/***************************************************************************
* Wrapper to product_triangular_matrix_matrix
***************************************************************************/
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
: traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
{};
} // end namespace internal
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
: public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
enum { IsLower = (Mode&Lower) == Lower };
Index stripedRows = ((!LhsIsTriangular) || (IsLower)) ? lhs.rows() : (std::min)(lhs.rows(),lhs.cols());
Index stripedCols = ((LhsIsTriangular) || (!IsLower)) ? rhs.cols() : (std::min)(rhs.cols(),rhs.rows());
Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
: ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
BlockingType blocking(stripedRows, stripedCols, stripedDepth);
internal::product_triangular_matrix_matrix<Scalar, Index,
Mode, LhsIsTriangular,
(internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
(internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
::run(
stripedRows, stripedCols, stripedDepth, // sizes
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha, blocking
);
}
};
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
namespace Eigen {
namespace internal {
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
struct product_triangular_matrix_matrix_trmm :
product_triangular_matrix_matrix<Scalar,Index,Mode,
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
// try to go to BLAS specialization
#define EIGEN_MKL_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
} \
};
EIGEN_MKL_TRMM_SPECIALIZE(double, true)
EIGEN_MKL_TRMM_SPECIALIZE(double, false)
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, true)
EIGEN_MKL_TRMM_SPECIALIZE(dcomplex, false)
EIGEN_MKL_TRMM_SPECIALIZE(float, true)
EIGEN_MKL_TRMM_SPECIALIZE(float, false)
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, true)
EIGEN_MKL_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_MKL_TRMM_L(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
}; \
\
static EIGEN_DONT_INLINE void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
{ \
Index diagSize = (std::min)(_rows,_depth); \
Index rows = IsLower ? _rows : diagSize; \
Index depth = IsLower ? diagSize : _depth; \
Index cols = _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
if (rows != depth) { \
\
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
\
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, resStride, alpha, gemm_blocking, 0); \
\
/*std::cout << "TRMM_L: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'L', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
MKL_INT m, n, lda, ldb; \
MKLTYPE alpha_; \
\
/* Set alpha_*/ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
\
/* Set m, n */ \
m = (MKL_INT)diagSize; \
n = (MKL_INT)cols; \
\
/* Set trans */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */ \
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _lhs; \
lda = lhsStride; \
} \
/*std::cout << "TRMM_L: A is square! Go to MKL TRMM implementation! \n";*/ \
/* call ?trmm*/ \
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
EIGEN_MKL_TRMM_L(double, double, d, d)
EIGEN_MKL_TRMM_L(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMM_L(float, float, f, s)
EIGEN_MKL_TRMM_L(scomplex, MKL_Complex8, cf, c)
// implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_MKL_TRMM_R(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
}; \
\
static EIGEN_DONT_INLINE void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
{ \
Index diagSize = (std::min)(_cols,_depth); \
Index rows = _rows; \
Index depth = IsLower ? _depth : diagSize; \
Index cols = IsLower ? diagSize : _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to MKL ?TRMM. Fall to default triangular product or call MKL ?GEMM*/ \
if (cols != depth) { \
\
int nthr = mkl_domain_get_max_threads(MKL_BLAS); \
\
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
/* Most likely no benefit to call TRMM or GEMM from MKL*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
MKL_INT aStride = aa_tmp.outerStride(); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, resStride, alpha, gemm_blocking, 0); \
\
/*std::cout << "TRMM_R: A is not square! Go to MKL GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'R', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
MKL_INT m, n, lda, ldb; \
MKLTYPE alpha_; \
\
/* Set alpha_*/ \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
\
/* Set m, n */ \
m = (MKL_INT)rows; \
n = (MKL_INT)diagSize; \
\
/* Set trans */ \
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */ \
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
b = b_tmp.data(); \
ldb = b_tmp.outerStride(); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _rhs; \
lda = rhsStride; \
} \
/*std::cout << "TRMM_R: A is square! Go to MKL TRMM implementation! \n";*/ \
/* call ?trmm*/ \
MKLPREFIX##trmm(&side, &uplo, &transa, &diag, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (MKLTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
EIGEN_MKL_TRMM_R(double, double, d, d)
EIGEN_MKL_TRMM_R(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMM_R(float, float, f, s)
EIGEN_MKL_TRMM_R(scomplex, MKL_Complex8, cf, c)
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_MKL_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
#define EIGEN_TRIANGULARMATRIXVECTOR_H
namespace Eigen {
namespace internal {
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder, int Version=Specialized>
struct triangular_matrix_vector_product;
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index size = (std::min)(_rows,_cols);
Index rows = IsLower ? _rows : (std::min)(_rows,_cols);
Index cols = IsLower ? (std::min)(_rows,_cols) : _cols;
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
typename conj_expr_if<ConjLhs,LhsMap>::type cjLhs(lhs);
typedef Map<const Matrix<RhsScalar,Dynamic,1>, 0, InnerStride<> > RhsMap;
const RhsMap rhs(_rhs,cols,InnerStride<>(rhsIncr));
typename conj_expr_if<ConjRhs,RhsMap>::type cjRhs(rhs);
typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
ResMap res(_res,rows);
for (Index pi=0; pi<size; pi+=PanelWidth)
{
Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
for (Index k=0; k<actualPanelWidth; ++k)
{
Index i = pi + k;
Index s = IsLower ? ((HasUnitDiag||HasZeroDiag) ? i+1 : i ) : pi;
Index r = IsLower ? actualPanelWidth-k : k+1;
if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
res.segment(s,r) += (alpha * cjRhs.coeff(i)) * cjLhs.col(i).segment(s,r);
if (HasUnitDiag)
res.coeffRef(i) += alpha * cjRhs.coeff(i);
}
Index r = IsLower ? rows - pi - actualPanelWidth : pi;
if (r>0)
{
Index s = IsLower ? pi+actualPanelWidth : 0;
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
r, actualPanelWidth,
&lhs.coeffRef(s,pi), lhsStride,
&rhs.coeffRef(pi), rhsIncr,
&res.coeffRef(s), resIncr, alpha);
}
}
if((!IsLower) && cols>size)
{
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
rows, cols-size,
&lhs.coeffRef(0,size), lhsStride,
&rhs.coeffRef(size), rhsIncr,
_res, resIncr, alpha);
}
}
};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index diagSize = (std::min)(_rows,_cols);
Index rows = IsLower ? _rows : diagSize;
Index cols = IsLower ? diagSize : _cols;
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,rows,cols,OuterStride<>(lhsStride));
typename conj_expr_if<ConjLhs,LhsMap>::type cjLhs(lhs);
typedef Map<const Matrix<RhsScalar,Dynamic,1> > RhsMap;
const RhsMap rhs(_rhs,cols);
typename conj_expr_if<ConjRhs,RhsMap>::type cjRhs(rhs);
typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
ResMap res(_res,rows,InnerStride<>(resIncr));
for (Index pi=0; pi<diagSize; pi+=PanelWidth)
{
Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
for (Index k=0; k<actualPanelWidth; ++k)
{
Index i = pi + k;
Index s = IsLower ? pi : ((HasUnitDiag||HasZeroDiag) ? i+1 : i);
Index r = IsLower ? k+1 : actualPanelWidth-k;
if ((!(HasUnitDiag||HasZeroDiag)) || (--r)>0)
res.coeffRef(i) += alpha * (cjLhs.row(i).segment(s,r).cwiseProduct(cjRhs.segment(s,r).transpose())).sum();
if (HasUnitDiag)
res.coeffRef(i) += alpha * cjRhs.coeff(i);
}
Index r = IsLower ? pi : cols - pi - actualPanelWidth;
if (r>0)
{
Index s = IsLower ? 0 : pi + actualPanelWidth;
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
actualPanelWidth, r,
&lhs.coeffRef(pi,s), lhsStride,
&rhs.coeffRef(s), rhsIncr,
&res.coeffRef(pi), resIncr, alpha);
}
}
if(IsLower && rows>diagSize)
{
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
rows-diagSize, cols,
&lhs.coeffRef(diagSize,0), lhsStride,
&rhs.coeffRef(0), rhsIncr,
&res.coeffRef(diagSize), resIncr, alpha);
}
}
};
/***************************************************************************
* Wrapper to product_triangular_vector
***************************************************************************/
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> >
: traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> >
{};
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> >
: traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> >
{};
template<int StorageOrder>
struct trmv_selector;
} // end namespace internal
template<int Mode, typename Lhs, typename Rhs>
struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
: public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs >
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
internal::trmv_selector<(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha);
}
};
template<int Mode, typename Lhs, typename Rhs>
struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
: public ProductBase<TriangularProduct<Mode,false,Lhs,true,Rhs,false>, Lhs, Rhs >
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
Transpose<Dest> dstT(dst);
internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
}
};
namespace internal {
// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
template<> struct trmv_selector<ColMajor>
{
template<int Mode, typename Lhs, typename Rhs, typename Dest>
static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
{
typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
// on, the other hand it is good for the cache to pack the vector anyways...
EvalToDestAtCompileTime = Dest::InnerStrideAtCompileTime==1,
ComplexByReal = (NumTraits<LhsScalar>::IsComplex) && (!NumTraits<RhsScalar>::IsComplex),
MightCannotUseDest = (Dest::InnerStrideAtCompileTime!=1) || ComplexByReal
};
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
evalToDest ? dest.data() : static_dest.data());
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
{
MappedDest(actualDestPtr, dest.size()).setZero();
compatibleAlpha = RhsScalar(1);
}
else
MappedDest(actualDestPtr, dest.size()) = dest;
}
internal::triangular_matrix_vector_product
<Index,Mode,
LhsScalar, LhsBlasTraits::NeedToConjugate,
RhsScalar, RhsBlasTraits::NeedToConjugate,
ColMajor>
::run(actualLhs.rows(),actualLhs.cols(),
actualLhs.data(),actualLhs.outerStride(),
actualRhs.data(),actualRhs.innerStride(),
actualDestPtr,1,compatibleAlpha);
if (!evalToDest)
{
if(!alphaIsCompatible)
dest += actualAlpha * MappedDest(actualDestPtr, dest.size());
else
dest = MappedDest(actualDestPtr, dest.size());
}
}
};
template<> struct trmv_selector<RowMajor>
{
template<int Mode, typename Lhs, typename Rhs, typename Dest>
static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
{
typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
enum {
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
if(!DirectlyUseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
internal::triangular_matrix_vector_product
<Index,Mode,
LhsScalar, LhsBlasTraits::NeedToConjugate,
RhsScalar, RhsBlasTraits::NeedToConjugate,
RowMajor>
::run(actualLhs.rows(),actualLhs.cols(),
actualLhs.data(),actualLhs.outerStride(),
actualRhsPtr,1,
dest.data(),dest.innerStride(),
actualAlpha);
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULARMATRIXVECTOR_H
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix-vector product functionality based on ?TRMV.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
#define EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
namespace Eigen {
namespace internal {
/**********************************************************************
* This file implements triangular matrix-vector multiplication using BLAS
**********************************************************************/
// trmv/hemv specialization
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
struct triangular_matrix_vector_product_trmv :
triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
#define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
}; \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
};
EIGEN_MKL_TRMV_SPECIALIZE(double)
EIGEN_MKL_TRMV_SPECIALIZE(float)
EIGEN_MKL_TRMV_SPECIALIZE(dcomplex)
EIGEN_MKL_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_MKL_TRMV_CM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (ConjLhs || IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/ \
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */\
\
char trans, uplo, diag; \
MKL_INT m, n, lda, incx, incy; \
EIGTYPE const *a; \
MKLTYPE alpha_, beta_; \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
\
/* Set m, n */ \
n = (MKL_INT)size; \
lda = lhsStride; \
incx = 1; \
incy = resIncr; \
\
/* Set uplo, trans and diag*/ \
trans = 'N'; \
uplo = IsLower ? 'L' : 'U'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size; \
m = rows-size; \
n = size; \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size*lda; \
m = size; \
n = cols-size; \
} \
MKLPREFIX##gemv(&trans, &m, &n, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
} \
} \
};
EIGEN_MKL_TRMV_CM(double, double, d, d)
EIGEN_MKL_TRMV_CM(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMV_CM(float, float, f, s)
EIGEN_MKL_TRMV_CM(scomplex, MKL_Complex8, cf, c)
// implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_MKL_TRMV_RM(EIGTYPE, MKLTYPE, EIGPREFIX, MKLPREFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/ \
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */\
\
char trans, uplo, diag; \
MKL_INT m, n, lda, incx, incy; \
EIGTYPE const *a; \
MKLTYPE alpha_, beta_; \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(alpha_, alpha); \
assign_scalar_eig2mkl<MKLTYPE, EIGTYPE>(beta_, EIGTYPE(1)); \
\
/* Set m, n */ \
n = (MKL_INT)size; \
lda = lhsStride; \
incx = 1; \
incy = resIncr; \
\
/* Set uplo, trans and diag*/ \
trans = ConjLhs ? 'C' : 'T'; \
uplo = IsLower ? 'U' : 'L'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
MKLPREFIX##trmv(&uplo, &trans, &diag, &n, (const MKLTYPE*)_lhs, &lda, (MKLTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
MKLPREFIX##axpy(&n, &alpha_,(const MKLTYPE*)x, &incx, (MKLTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to MKL ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
typedef Matrix<EIGTYPE, Dynamic, Dynamic> MatrixLhs; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size*lda; \
m = rows-size; \
n = size; \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size; \
m = size; \
n = cols-size; \
} \
MKLPREFIX##gemv(&trans, &n, &m, &alpha_, (const MKLTYPE*)a, &lda, (const MKLTYPE*)x, &incx, &beta_, (MKLTYPE*)y, &incy); \
} \
} \
};
EIGEN_MKL_TRMV_RM(double, double, d, d)
EIGEN_MKL_TRMV_RM(dcomplex, MKL_Complex16, cd, z)
EIGEN_MKL_TRMV_RM(float, float, f, s)
EIGEN_MKL_TRMV_RM(scomplex, MKL_Complex8, cf, c)
} // end namespase internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_MKL_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_H
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_H
namespace Eigen {
namespace internal {
// if the rhs is row major, let's transpose the product
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
{
static EIGEN_DONT_INLINE void run(
Index size, Index cols,
const Scalar* tri, Index triStride,
Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
triangular_solve_matrix<
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
NumTraits<Scalar>::IsComplex && Conjugate,
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
::run(size, cols, tri, triStride, _other, otherStride, blocking);
}
};
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
*/
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
{
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index cols = otherSize;
const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride);
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower
};
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(size,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
conj_if<Conjugate> conj;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
// the goal here is to subdivise the Rhs panels such that we keep some cache
// coherence when accessing the rhs elements
std::ptrdiff_t l1, l2;
manage_caching_sizes(GetAction, &l1, &l2);
Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
for(Index k2=IsLower ? 0 : size;
IsLower ? k2<size : k2>0;
IsLower ? k2+=kc : k2-=kc)
{
const Index actual_kc = (std::min)(IsLower ? size-k2 : k2, kc);
// We have selected and packed a big horizontal panel R1 of rhs. Let B be the packed copy of this panel,
// and R2 the remaining part of rhs. The corresponding vertical panel of lhs is split into
// A11 (the triangular part) and A21 the remaining rectangular part.
// Then the high level algorithm is:
// - B = R1 => general block copy (done during the next step)
// - R1 = A11^-1 B => tricky part
// - update B from the new R1 => actually this has to be performed continuously during the above step
// - R2 -= A21 * B => GEPP
// The tricky part: compute R1 = A11^-1 B while updating B from R1
// The idea is to split A11 into multiple small vertical panels.
// Each panel can be split into a small triangular part T1k which is processed without optimization,
// and the remaining small part T2k which is processed using gebp with appropriate block strides
for(Index j2=0; j2<cols; j2+=subcols)
{
Index actual_cols = (std::min)(cols-j2,subcols);
// for each small vertical panels [T1k^T, T2k^T]^T of lhs
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
// tr solve
for (Index k=0; k<actualPanelWidth; ++k)
{
// TODO write a small kernel handling this (can be shared with trsv)
Index i = IsLower ? k2+k1+k : k2-k1-k-1;
Index s = IsLower ? k2+k1 : i+1;
Index rs = actualPanelWidth - k - 1; // remaining size
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
for (Index j=j2; j<j2+actual_cols; ++j)
{
if (TriStorageOrder==RowMajor)
{
Scalar b(0);
const Scalar* l = &tri(i,s);
Scalar* r = &other(s,j);
for (Index i3=0; i3<k; ++i3)
b += conj(l[i3]) * r[i3];
other(i,j) = (other(i,j) - b)*a;
}
else
{
Index s = IsLower ? i+1 : i-rs;
Scalar b = (other(i,j) *= a);
Scalar* r = &other(s,j);
const Scalar* l = &tri(s,i);
for (Index i3=0;i3<rs;++i3)
r[i3] -= b * conj(l[i3]);
}
}
}
Index lengthTarget = actual_kc-k1-actualPanelWidth;
Index startBlock = IsLower ? k2+k1 : k2-k1-actualPanelWidth;
Index blockBOffset = IsLower ? k1 : lengthTarget;
// update the respective rows of B from other
pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset);
// GEBP
if (lengthTarget>0)
{
Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
}
}
}
// R2 -= A21 * B => GEPP
{
Index start = IsLower ? k2+kc : 0;
Index end = IsLower ? size : k2-kc;
for(Index i2=start; i2<end; i2+=mc)
{
const Index actual_mc = (std::min)(mc,end-i2);
if (actual_mc>0)
{
pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW);
}
}
}
}
}
};
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
*/
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
{
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index rows = otherSize;
const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride);
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
RhsStorageOrder = TriStorageOrder,
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower
};
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*size;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
conj_if<Conjugate> conj;
gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
for(Index k2=IsLower ? size : 0;
IsLower ? k2>0 : k2<size;
IsLower ? k2-=kc : k2+=kc)
{
const Index actual_kc = (std::min)(IsLower ? k2 : size-k2, kc);
Index actual_k2 = IsLower ? k2-actual_kc : k2 ;
Index startPanel = IsLower ? 0 : k2+actual_kc;
Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
Scalar* geb = blockB+actual_kc*actual_kc;
if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, actual_kc, rs);
// triangular packing (we only pack the panels off the diagonal,
// neglecting the blocks overlapping the diagonal
{
for (Index j2=0; j2<actual_kc; j2+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
Index actual_j2 = actual_k2 + j2;
Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
if (panelLength>0)
pack_rhs_panel(blockB+j2*actual_kc,
&rhs(actual_k2+panelOffset, actual_j2), triStride,
panelLength, actualPanelWidth,
actual_kc, panelOffset);
}
}
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = (std::min)(mc,rows-i2);
// triangular solver kernel
{
// for each small block of the diagonal (=> vertical panels of rhs)
for (Index j2 = IsLower
? (actual_kc - ((actual_kc%SmallPanelWidth) ? Index(actual_kc%SmallPanelWidth)
: Index(SmallPanelWidth)))
: 0;
IsLower ? j2>=0 : j2<actual_kc;
IsLower ? j2-=SmallPanelWidth : j2+=SmallPanelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-j2, SmallPanelWidth);
Index absolute_j2 = actual_k2 + j2;
Index panelOffset = IsLower ? j2+actualPanelWidth : 0;
Index panelLength = IsLower ? actual_kc - j2 - actualPanelWidth : j2;
// GEBP
if(panelLength>0)
{
gebp_kernel(&lhs(i2,absolute_j2), otherStride,
blockA, blockB+j2*actual_kc,
actual_mc, panelLength, actualPanelWidth,
Scalar(-1),
actual_kc, actual_kc, // strides
panelOffset, panelOffset, // offsets
blockW); // workspace
}
// unblocked triangular solve
for (Index k=0; k<actualPanelWidth; ++k)
{
Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
Scalar* r = &lhs(i2,j);
for (Index k3=0; k3<k; ++k3)
{
Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
Scalar* a = &lhs(i2,IsLower ? j+1+k3 : absolute_j2+k3);
for (Index i=0; i<actual_mc; ++i)
r[i] -= a[i] * b;
}
Scalar b = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(rhs(j,j));
for (Index i=0; i<actual_mc; ++i)
r[i] *= b;
}
// pack the just computed part of lhs to A
pack_lhs_panel(blockA, _other+absolute_j2*otherStride+i2, otherStride,
actualPanelWidth, actual_mc,
actual_kc, j2);
}
}
if (rs>0)
gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
actual_mc, actual_kc, rs, Scalar(-1),
-1, -1, 0, 0, blockW);
}
}
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_H
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
namespace Eigen {
namespace internal {
// implements LeftSide op(triangular)^-1 * general
#define EIGEN_MKL_TRSM_L(EIGTYPE, MKLTYPE, MKLPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static EIGEN_DONT_INLINE void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
MKL_INT m = size, n = otherSize, lda, ldb; \
char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */ \
MKLTYPE alpha; \
EIGTYPE myone(1); \
assign_scalar_eig2mkl(alpha, myone); \
ldb = otherStride;\
\
const EIGTYPE *a; \
/* Set trans */ \
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _tri; \
lda = triStride; \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
} \
};
EIGEN_MKL_TRSM_L(double, double, d)
EIGEN_MKL_TRSM_L(dcomplex, MKL_Complex16, z)
EIGEN_MKL_TRSM_L(float, float, s)
EIGEN_MKL_TRSM_L(scomplex, MKL_Complex8, c)
// implements RightSide general * op(triangular)^-1
#define EIGEN_MKL_TRSM_R(EIGTYPE, MKLTYPE, MKLPREFIX) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static EIGEN_DONT_INLINE void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
MKL_INT m = otherSize, n = size, lda, ldb; \
char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */ \
MKLTYPE alpha; \
EIGTYPE myone(1); \
assign_scalar_eig2mkl(alpha, myone); \
ldb = otherStride;\
\
const EIGTYPE *a; \
/* Set trans */ \
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = a_tmp.outerStride(); \
} else { \
a = _tri; \
lda = triStride; \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
MKLPREFIX##trsm(&side, &uplo, &transa, &diag, &m, &n, &alpha, (const MKLTYPE*)a, &lda, (MKLTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \
} \
};
EIGEN_MKL_TRSM_R(double, double, d)
EIGEN_MKL_TRSM_R(dcomplex, MKL_Complex16, z)
EIGEN_MKL_TRSM_R(float, float, s)
EIGEN_MKL_TRSM_R(scomplex, MKL_Complex8, c)
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_MKL_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H
#define EIGEN_TRIANGULAR_SOLVER_VECTOR_H
namespace Eigen {
namespace internal {
template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate, int StorageOrder>
struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheRight, Mode, Conjugate, StorageOrder>
{
static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
{
triangular_solve_vector<LhsScalar,RhsScalar,Index,OnTheLeft,
((Mode&Upper)==Upper ? Lower : Upper) | (Mode&UnitDiag),
Conjugate,StorageOrder==RowMajor?ColMajor:RowMajor
>::run(size, _lhs, lhsStride, rhs);
}
};
// forward and backward substitution, row-major, rhs is a vector
template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>
struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, RowMajor>
{
enum {
IsLower = ((Mode&Lower)==Lower)
};
static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
{
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
typename internal::conditional<
Conjugate,
const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
const LhsMap&>
::type cjLhs(lhs);
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
for(Index pi=IsLower ? 0 : size;
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
Index r = IsLower ? pi : size - pi; // remaining size
if (r > 0)
{
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
Index startRow = IsLower ? pi : pi-actualPanelWidth;
Index startCol = IsLower ? 0 : pi;
general_matrix_vector_product<Index,LhsScalar,RowMajor,Conjugate,RhsScalar,false>::run(
actualPanelWidth, r,
&lhs.coeffRef(startRow,startCol), lhsStride,
rhs + startCol, 1,
rhs + startRow, 1,
RhsScalar(-1));
}
for(Index k=0; k<actualPanelWidth; ++k)
{
Index i = IsLower ? pi+k : pi-k-1;
Index s = IsLower ? pi : i+1;
if (k>0)
rhs[i] -= (cjLhs.row(i).segment(s,k).transpose().cwiseProduct(Map<const Matrix<RhsScalar,Dynamic,1> >(rhs+s,k))).sum();
if(!(Mode & UnitDiag))
rhs[i] /= cjLhs(i,i);
}
}
}
};
// forward and backward substitution, column-major, rhs is a vector
template<typename LhsScalar, typename RhsScalar, typename Index, int Mode, bool Conjugate>
struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Conjugate, ColMajor>
{
enum {
IsLower = ((Mode&Lower)==Lower)
};
static void run(Index size, const LhsScalar* _lhs, Index lhsStride, RhsScalar* rhs)
{
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
typename internal::conditional<Conjugate,
const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
const LhsMap&
>::type cjLhs(lhs);
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
for(Index pi=IsLower ? 0 : size;
IsLower ? pi<size : pi>0;
IsLower ? pi+=PanelWidth : pi-=PanelWidth)
{
Index actualPanelWidth = (std::min)(IsLower ? size - pi : pi, PanelWidth);
Index startBlock = IsLower ? pi : pi-actualPanelWidth;
Index endBlock = IsLower ? pi + actualPanelWidth : 0;
for(Index k=0; k<actualPanelWidth; ++k)
{
Index i = IsLower ? pi+k : pi-k-1;
if(!(Mode & UnitDiag))
rhs[i] /= cjLhs.coeff(i,i);
Index r = actualPanelWidth - k - 1; // remaining size
Index s = IsLower ? i+1 : i-r;
if (r>0)
Map<Matrix<RhsScalar,Dynamic,1> >(rhs+s,r) -= rhs[i] * cjLhs.col(i).segment(s,r);
}
Index r = IsLower ? size - endBlock : startBlock; // remaining size
if (r > 0)
{
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
general_matrix_vector_product<Index,LhsScalar,ColMajor,Conjugate,RhsScalar,false>::run(
r, actualPanelWidth,
&lhs.coeffRef(endBlock,startBlock), lhsStride,
rhs+startBlock, 1,
rhs+endBlock, 1, RhsScalar(-1));
}
}
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_SOLVER_VECTOR_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_BLASUTIL_H
#define EIGEN_BLASUTIL_H
// This file contains many lightweight helper classes used to
// implement and control fast level 2 and level 3 BLAS-like routines.
namespace Eigen {
namespace internal {
// forward declarations
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
struct gebp_kernel;
template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
struct gemm_pack_rhs;
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
struct gemm_pack_lhs;
template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
struct general_matrix_matrix_product;
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized>
struct general_matrix_vector_product;
template<bool Conjugate> struct conj_if;
template<> struct conj_if<true> {
template<typename T>
inline T operator()(const T& x) { return conj(x); }
template<typename T>
inline T pconj(const T& x) { return internal::pconj(x); }
};
template<> struct conj_if<false> {
template<typename T>
inline const T& operator()(const T& x) { return x; }
template<typename T>
inline const T& pconj(const T& x) { return x; }
};
template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
{
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(real(x)*real(y) + imag(x)*imag(y), imag(x)*real(y) - real(x)*imag(y)); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(real(x)*real(y) + imag(x)*imag(y), real(x)*imag(y) - imag(x)*real(y)); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(real(x)*real(y) - imag(x)*imag(y), - real(x)*imag(y) - imag(x)*real(y)); }
};
template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const RealScalar& y, const Scalar& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const RealScalar& y) const
{ return conj_if<Conj>()(x)*y; }
};
template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::complex<RealScalar>, false,Conj>
{
typedef std::complex<RealScalar> Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const RealScalar& x, const Scalar& y, const Scalar& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const RealScalar& x, const Scalar& y) const
{ return x*conj_if<Conj>()(y); }
};
template<typename From,typename To> struct get_factor {
static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
};
template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
};
// Lightweight helper class to access matrix coefficients.
// Yes, this is somehow redundant with Map<>, but this version is much much lighter,
// and so I hope better compilation performance (time and code quality).
template<typename Scalar, typename Index, int StorageOrder>
class blas_data_mapper
{
public:
blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_STRONG_INLINE Scalar& operator()(Index i, Index j)
{ return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; }
protected:
Scalar* EIGEN_RESTRICT m_data;
Index m_stride;
};
// lightweight helper class to access matrix coefficients (const version)
template<typename Scalar, typename Index, int StorageOrder>
class const_blas_data_mapper
{
public:
const_blas_data_mapper(const Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_STRONG_INLINE const Scalar& operator()(Index i, Index j) const
{ return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; }
protected:
const Scalar* EIGEN_RESTRICT m_data;
Index m_stride;
};
/* Helper class to analyze the factors of a Product expression.
* In particular it allows to pop out operator-, scalar multiples,
* and conjugate */
template<typename XprType> struct blas_traits
{
typedef typename traits<XprType>::Scalar Scalar;
typedef const XprType& ExtractType;
typedef XprType _ExtractType;
enum {
IsComplex = NumTraits<Scalar>::IsComplex,
IsTransposed = false,
NeedToConjugate = false,
HasUsableDirectAccess = ( (int(XprType::Flags)&DirectAccessBit)
&& ( bool(XprType::IsVectorAtCompileTime)
|| int(inner_stride_at_compile_time<XprType>::ret) == 1)
) ? 1 : 0
};
typedef typename conditional<bool(HasUsableDirectAccess),
ExtractType,
typename _ExtractType::PlainObject
>::type DirectLinearAccessType;
static inline ExtractType extract(const XprType& x) { return x; }
static inline const Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
};
// pop conjugate
template<typename Scalar, typename NestedXpr>
struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
: blas_traits<NestedXpr>
{
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
enum {
IsComplex = NumTraits<Scalar>::IsComplex,
NeedToConjugate = Base::NeedToConjugate ? 0 : IsComplex
};
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x) { return conj(Base::extractScalarFactor(x.nestedExpression())); }
};
// pop scalar multiple
template<typename Scalar, typename NestedXpr>
struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
: blas_traits<NestedXpr>
{
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); }
};
// pop opposite
template<typename Scalar, typename NestedXpr>
struct blas_traits<CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> >
: blas_traits<NestedXpr>
{
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_opposite_op<Scalar>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return - Base::extractScalarFactor(x.nestedExpression()); }
};
// pop/push transpose
template<typename NestedXpr>
struct blas_traits<Transpose<NestedXpr> >
: blas_traits<NestedXpr>
{
typedef typename NestedXpr::Scalar Scalar;
typedef blas_traits<NestedXpr> Base;
typedef Transpose<NestedXpr> XprType;
typedef Transpose<const typename Base::_ExtractType> ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS
typedef Transpose<const typename Base::_ExtractType> _ExtractType;
typedef typename conditional<bool(Base::HasUsableDirectAccess),
ExtractType,
typename ExtractType::PlainObject
>::type DirectLinearAccessType;
enum {
IsTransposed = Base::IsTransposed ? 0 : 1
};
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
};
template<typename T>
struct blas_traits<const T>
: blas_traits<T>
{};
template<typename T, bool HasUsableDirectAccess=blas_traits<T>::HasUsableDirectAccess>
struct extract_data_selector {
static const typename T::Scalar* run(const T& m)
{
return blas_traits<T>::extract(m).data();
}
};
template<typename T>
struct extract_data_selector<T,false> {
static typename T::Scalar* run(const T&) { return 0; }
};
template<typename T> const typename T::Scalar* extract_data(const T& m)
{
return extract_data_selector<T>::run(m);
}
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_BLASUTIL_H
FILE(GLOB Eigen_Core_util_SRCS "*.h")
INSTALL(FILES
${Eigen_Core_util_SRCS}
DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util COMPONENT Devel
)
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_CONSTANTS_H
#define EIGEN_CONSTANTS_H
namespace Eigen {
/** This value means that a quantity is not known at compile-time, and that instead the value is
* stored in some runtime variable.
*
* Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
*/
const int Dynamic = -1;
/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
* The value Infinity there means the L-infinity norm.
*/
const int Infinity = -1;
/** \defgroup flags Flags
* \ingroup Core_Module
*
* These are the possible bits which can be OR'ed to constitute the flags of a matrix or
* expression.
*
* It is important to note that these flags are a purely compile-time notion. They are a compile-time property of
* an expression type, implemented as enum's. They are not stored in memory at runtime, and they do not incur any
* runtime overhead.
*
* \sa MatrixBase::Flags
*/
/** \ingroup flags
*
* for a matrix, this means that the storage order is row-major.
* If this bit is not set, the storage order is column-major.
* For an expression, this determines the storage order of
* the matrix created by evaluation of that expression.
* \sa \ref TopicStorageOrders */
const unsigned int RowMajorBit = 0x1;
/** \ingroup flags
*
* means the expression should be evaluated by the calling expression */
const unsigned int EvalBeforeNestingBit = 0x2;
/** \ingroup flags
*
* means the expression should be evaluated before any assignment */
const unsigned int EvalBeforeAssigningBit = 0x4;
/** \ingroup flags
*
* Short version: means the expression might be vectorized
*
* Long version: means that the coefficients can be handled by packets
* and start at a memory location whose alignment meets the requirements
* of the present CPU architecture for optimized packet access. In the fixed-size
* case, there is the additional condition that it be possible to access all the
* coefficients by packets (this implies the requirement that the size be a multiple of 16 bytes,
* and that any nontrivial strides don't break the alignment). In the dynamic-size case,
* there is no such condition on the total size and strides, so it might not be possible to access
* all coeffs by packets.
*
* \note This bit can be set regardless of whether vectorization is actually enabled.
* To check for actual vectorizability, see \a ActualPacketAccessBit.
*/
const unsigned int PacketAccessBit = 0x8;
#ifdef EIGEN_VECTORIZE
/** \ingroup flags
*
* If vectorization is enabled (EIGEN_VECTORIZE is defined) this constant
* is set to the value \a PacketAccessBit.
*
* If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
* is set to the value 0.
*/
const unsigned int ActualPacketAccessBit = PacketAccessBit;
#else
const unsigned int ActualPacketAccessBit = 0x0;
#endif
/** \ingroup flags
*
* Short version: means the expression can be seen as 1D vector.
*
* Long version: means that one can access the coefficients
* of this expression by coeff(int), and coeffRef(int) in the case of a lvalue expression. These
* index-based access methods are guaranteed
* to not have to do any runtime computation of a (row, col)-pair from the index, so that it
* is guaranteed that whenever it is available, index-based access is at least as fast as
* (row,col)-based access. Expressions for which that isn't possible don't have the LinearAccessBit.
*
* If both PacketAccessBit and LinearAccessBit are set, then the
* packets of this expression can be accessed by packet(int), and writePacket(int) in the case of a
* lvalue expression.
*
* Typically, all vector expressions have the LinearAccessBit, but there is one exception:
* Product expressions don't have it, because it would be troublesome for vectorization, even when the
* Product is a vector expression. Thus, vector Product expressions allow index-based coefficient access but
* not index-based packet access, so they don't have the LinearAccessBit.
*/
const unsigned int LinearAccessBit = 0x10;
/** \ingroup flags
*
* Means the expression has a coeffRef() method, i.e. is writable as its individual coefficients are directly addressable.
* This rules out read-only expressions.
*
* Note that DirectAccessBit and LvalueBit are mutually orthogonal, as there are examples of expression having one but note
* the other:
* \li writable expressions that don't have a very simple memory layout as a strided array, have LvalueBit but not DirectAccessBit
* \li Map-to-const expressions, for example Map<const Matrix>, have DirectAccessBit but not LvalueBit
*
* Expressions having LvalueBit also have their coeff() method returning a const reference instead of returning a new value.
*/
const unsigned int LvalueBit = 0x20;
/** \ingroup flags
*
* Means that the underlying array of coefficients can be directly accessed as a plain strided array. The memory layout
* of the array of coefficients must be exactly the natural one suggested by rows(), cols(),
* outerStride(), innerStride(), and the RowMajorBit. This rules out expressions such as Diagonal, whose coefficients,
* though referencable, do not have such a regular memory layout.
*
* See the comment on LvalueBit for an explanation of how LvalueBit and DirectAccessBit are mutually orthogonal.
*/
const unsigned int DirectAccessBit = 0x40;
/** \ingroup flags
*
* means the first coefficient packet is guaranteed to be aligned */
const unsigned int AlignedBit = 0x80;
const unsigned int NestByRefBit = 0x100;
// list of flags that are inherited by default
const unsigned int HereditaryBits = RowMajorBit
| EvalBeforeNestingBit
| EvalBeforeAssigningBit;
/** \defgroup enums Enumerations
* \ingroup Core_Module
*
* Various enumerations used in %Eigen. Many of these are used as template parameters.
*/
/** \ingroup enums
* Enum containing possible values for the \p Mode parameter of
* MatrixBase::selfadjointView() and MatrixBase::triangularView(). */
enum {
/** View matrix as a lower triangular matrix. */
Lower=0x1,
/** View matrix as an upper triangular matrix. */
Upper=0x2,
/** %Matrix has ones on the diagonal; to be used in combination with #Lower or #Upper. */
UnitDiag=0x4,
/** %Matrix has zeros on the diagonal; to be used in combination with #Lower or #Upper. */
ZeroDiag=0x8,
/** View matrix as a lower triangular matrix with ones on the diagonal. */
UnitLower=UnitDiag|Lower,
/** View matrix as an upper triangular matrix with ones on the diagonal. */
UnitUpper=UnitDiag|Upper,
/** View matrix as a lower triangular matrix with zeros on the diagonal. */
StrictlyLower=ZeroDiag|Lower,
/** View matrix as an upper triangular matrix with zeros on the diagonal. */
StrictlyUpper=ZeroDiag|Upper,
/** Used in BandMatrix and SelfAdjointView to indicate that the matrix is self-adjoint. */
SelfAdjoint=0x10,
/** Used to support symmetric, non-selfadjoint, complex matrices. */
Symmetric=0x20
};
/** \ingroup enums
* Enum for indicating whether an object is aligned or not. */
enum {
/** Object is not correctly aligned for vectorization. */
Unaligned=0,
/** Object is aligned for vectorization. */
Aligned=1
};
/** \ingroup enums
* Enum used by DenseBase::corner() in Eigen2 compatibility mode. */
// FIXME after the corner() API change, this was not needed anymore, except by AlignedBox
// TODO: find out what to do with that. Adapt the AlignedBox API ?
enum CornerType { TopLeft, TopRight, BottomLeft, BottomRight };
/** \ingroup enums
* Enum containing possible values for the \p Direction parameter of
* Reverse, PartialReduxExpr and VectorwiseOp. */
enum DirectionType {
/** For Reverse, all columns are reversed;
* for PartialReduxExpr and VectorwiseOp, act on columns. */
Vertical,
/** For Reverse, all rows are reversed;
* for PartialReduxExpr and VectorwiseOp, act on rows. */
Horizontal,
/** For Reverse, both rows and columns are reversed;
* not used for PartialReduxExpr and VectorwiseOp. */
BothDirections
};
/** \internal \ingroup enums
* Enum to specify how to traverse the entries of a matrix. */
enum {
/** \internal Default traversal, no vectorization, no index-based access */
DefaultTraversal,
/** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
LinearTraversal,
/** \internal Equivalent to a slice vectorization for fixed-size matrices having good alignment
* and good size */
InnerVectorizedTraversal,
/** \internal Vectorization path using a single loop plus scalar loops for the
* unaligned boundaries */
LinearVectorizedTraversal,
/** \internal Generic vectorization path using one vectorized loop per row/column with some
* scalar loops to handle the unaligned boundaries */
SliceVectorizedTraversal,
/** \internal Special case to properly handle incompatible scalar types or other defecting cases*/
InvalidTraversal
};
/** \internal \ingroup enums
* Enum to specify whether to unroll loops when traversing over the entries of a matrix. */
enum {
/** \internal Do not unroll loops. */
NoUnrolling,
/** \internal Unroll only the inner loop, but not the outer loop. */
InnerUnrolling,
/** \internal Unroll both the inner and the outer loop. If there is only one loop,
* because linear traversal is used, then unroll that loop. */
CompleteUnrolling
};
/** \internal \ingroup enums
* Enum to specify whether to use the default (built-in) implementation or the specialization. */
enum {
Specialized,
BuiltIn
};
/** \ingroup enums
* Enum containing possible values for the \p _Options template parameter of
* Matrix, Array and BandMatrix. */
enum {
/** Storage order is column major (see \ref TopicStorageOrders). */
ColMajor = 0,
/** Storage order is row major (see \ref TopicStorageOrders). */
RowMajor = 0x1, // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that
/** \internal Align the matrix itself if it is vectorizable fixed-size */
AutoAlign = 0,
/** \internal Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
DontAlign = 0x2
};
/** \ingroup enums
* Enum for specifying whether to apply or solve on the left or right. */
enum {
/** Apply transformation on the left. */
OnTheLeft = 1,
/** Apply transformation on the right. */
OnTheRight = 2
};
/* the following used to be written as:
*
* struct NoChange_t {};
* namespace {
* EIGEN_UNUSED NoChange_t NoChange;
* }
*
* on the ground that it feels dangerous to disambiguate overloaded functions on enum/integer types.
* However, this leads to "variable declared but never referenced" warnings on Intel Composer XE,
* and we do not know how to get rid of them (bug 450).
*/
enum NoChange_t { NoChange };
enum Sequential_t { Sequential };
enum Default_t { Default };
/** \internal \ingroup enums
* Used in AmbiVector. */
enum {
IsDense = 0,
IsSparse
};
/** \ingroup enums
* Used as template parameter in DenseCoeffBase and MapBase to indicate
* which accessors should be provided. */
enum AccessorLevels {
/** Read-only access via a member function. */
ReadOnlyAccessors,
/** Read/write access via member functions. */
WriteAccessors,
/** Direct read-only access to the coefficients. */
DirectAccessors,
/** Direct read/write access to the coefficients. */
DirectWriteAccessors
};
/** \ingroup enums
* Enum with options to give to various decompositions. */
enum DecompositionOptions {
/** \internal Not used (meant for LDLT?). */
Pivoting = 0x01,
/** \internal Not used (meant for LDLT?). */
NoPivoting = 0x02,
/** Used in JacobiSVD to indicate that the square matrix U is to be computed. */
ComputeFullU = 0x04,
/** Used in JacobiSVD to indicate that the thin matrix U is to be computed. */
ComputeThinU = 0x08,
/** Used in JacobiSVD to indicate that the square matrix V is to be computed. */
ComputeFullV = 0x10,
/** Used in JacobiSVD to indicate that the thin matrix V is to be computed. */
ComputeThinV = 0x20,
/** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify
* that only the eigenvalues are to be computed and not the eigenvectors. */
EigenvaluesOnly = 0x40,
/** Used in SelfAdjointEigenSolver and GeneralizedSelfAdjointEigenSolver to specify
* that both the eigenvalues and the eigenvectors are to be computed. */
ComputeEigenvectors = 0x80,
/** \internal */
EigVecMask = EigenvaluesOnly | ComputeEigenvectors,
/** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
* solve the generalized eigenproblem \f$ Ax = \lambda B x \f$. */
Ax_lBx = 0x100,
/** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
* solve the generalized eigenproblem \f$ ABx = \lambda x \f$. */
ABx_lx = 0x200,
/** Used in GeneralizedSelfAdjointEigenSolver to indicate that it should
* solve the generalized eigenproblem \f$ BAx = \lambda x \f$. */
BAx_lx = 0x400,
/** \internal */
GenEigMask = Ax_lBx | ABx_lx | BAx_lx
};
/** \ingroup enums
* Possible values for the \p QRPreconditioner template parameter of JacobiSVD. */
enum QRPreconditioners {
/** Do not specify what is to be done if the SVD of a non-square matrix is asked for. */
NoQRPreconditioner,
/** Use a QR decomposition without pivoting as the first step. */
HouseholderQRPreconditioner,
/** Use a QR decomposition with column pivoting as the first step. */
ColPivHouseholderQRPreconditioner,
/** Use a QR decomposition with full pivoting as the first step. */
FullPivHouseholderQRPreconditioner
};
#ifdef Success
#error The preprocessor symbol 'Success' is defined, possibly by the X11 header file X.h
#endif
/** \ingroup enums
* Enum for reporting the status of a computation. */
enum ComputationInfo {
/** Computation was successful. */
Success = 0,
/** The provided data did not satisfy the prerequisites. */
NumericalIssue = 1,
/** Iterative procedure did not converge. */
NoConvergence = 2,
/** The inputs are invalid, or the algorithm has been improperly called.
* When assertions are enabled, such errors trigger an assert. */
InvalidInput = 3
};
/** \ingroup enums
* Enum used to specify how a particular transformation is stored in a matrix.
* \sa Transform, Hyperplane::transform(). */
enum TransformTraits {
/** Transformation is an isometry. */
Isometry = 0x1,
/** Transformation is an affine transformation stored as a (Dim+1)^2 matrix whose last row is
* assumed to be [0 ... 0 1]. */
Affine = 0x2,
/** Transformation is an affine transformation stored as a (Dim) x (Dim+1) matrix. */
AffineCompact = 0x10 | Affine,
/** Transformation is a general projective transformation stored as a (Dim+1)^2 matrix. */
Projective = 0x20
};
/** \internal \ingroup enums
* Enum used to choose between implementation depending on the computer architecture. */
namespace Architecture
{
enum Type {
Generic = 0x0,
SSE = 0x1,
AltiVec = 0x2,
#if defined EIGEN_VECTORIZE_SSE
Target = SSE
#elif defined EIGEN_VECTORIZE_ALTIVEC
Target = AltiVec
#else
Target = Generic
#endif
};
}
/** \internal \ingroup enums
* Enum used as template parameter in GeneralProduct. */
enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
/** \internal \ingroup enums
* Enum used in experimental parallel implementation. */
enum Action {GetAction, SetAction};
/** The type used to identify a dense storage. */
struct Dense {};
/** The type used to identify a matrix expression */
struct MatrixXpr {};
/** The type used to identify an array expression */
struct ArrayXpr {};
} // end namespace Eigen
#endif // EIGEN_CONSTANTS_H
#ifndef EIGEN_WARNINGS_DISABLED
#define EIGEN_WARNINGS_DISABLED
#ifdef _MSC_VER
// 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p))
// 4101 - unreferenced local variable
// 4127 - conditional expression is constant
// 4181 - qualifier applied to reference type ignored
// 4211 - nonstandard extension used : redefined extern to static
// 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
// 4273 - QtAlignedMalloc, inconsistent DLL linkage
// 4324 - structure was padded due to declspec(align())
// 4512 - assignment operator could not be generated
// 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push )
#endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4512 4522 4700 4717 )
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
// typedef that may be a reference type.
// 279 - controlling expression is constant
// ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning push
#endif
#pragma warning disable 2196 279
#elif defined __clang__
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma clang diagnostic push
#endif
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
#endif
#endif // not EIGEN_WARNINGS_DISABLED
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2007-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_FORWARDDECLARATIONS_H
#define EIGEN_FORWARDDECLARATIONS_H
namespace Eigen {
namespace internal {
template<typename T> struct traits;
// here we say once and for all that traits<const T> == traits<T>
// When constness must affect traits, it has to be constness on template parameters on which T itself depends.
// For example, traits<Map<const T> > != traits<Map<T> >, but
// traits<const Map<T> > == traits<Map<T> >
template<typename T> struct traits<const T> : traits<T> {};
template<typename Derived> struct has_direct_access
{
enum { ret = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0 };
};
template<typename Derived> struct accessors_level
{
enum { has_direct_access = (traits<Derived>::Flags & DirectAccessBit) ? 1 : 0,
has_write_access = (traits<Derived>::Flags & LvalueBit) ? 1 : 0,
value = has_direct_access ? (has_write_access ? DirectWriteAccessors : DirectAccessors)
: (has_write_access ? WriteAccessors : ReadOnlyAccessors)
};
};
} // end namespace internal
template<typename T> struct NumTraits;
template<typename Derived> struct EigenBase;
template<typename Derived> class DenseBase;
template<typename Derived> class PlainObjectBase;
template<typename Derived,
int Level = internal::accessors_level<Derived>::value >
class DenseCoeffsBase;
template<typename _Scalar, int _Rows, int _Cols,
int _Options = AutoAlign |
#if defined(__GNUC__) && __GNUC__==3 && __GNUC_MINOR__==4
// workaround a bug in at least gcc 3.4.6
// the innermost ?: ternary operator is misparsed. We write it slightly
// differently and this makes gcc 3.4.6 happy, but it's ugly.
// The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
// (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
( (_Rows==1 && _Cols!=1) ? RowMajor
: !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
: ColMajor ),
#else
( (_Rows==1 && _Cols!=1) ? RowMajor
: (_Cols==1 && _Rows!=1) ? ColMajor
: EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
#endif
int _MaxRows = _Rows,
int _MaxCols = _Cols
> class Matrix;
template<typename Derived> class MatrixBase;
template<typename Derived> class ArrayBase;
template<typename ExpressionType, unsigned int Added, unsigned int Removed> class Flagged;
template<typename ExpressionType, template <typename> class StorageBase > class NoAlias;
template<typename ExpressionType> class NestByValue;
template<typename ExpressionType> class ForceAlignedAccess;
template<typename ExpressionType> class SwapWrapper;
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class Block;
template<typename MatrixType, int Size=Dynamic> class VectorBlock;
template<typename MatrixType> class Transpose;
template<typename MatrixType> class Conjugate;
template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp;
template<typename Derived, typename Lhs, typename Rhs> class ProductBase;
template<typename Lhs, typename Rhs, int Mode> class GeneralProduct;
template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct;
template<typename Derived> class DiagonalBase;
template<typename _DiagonalVectorType> class DiagonalWrapper;
template<typename _Scalar, int SizeAtCompileTime, int MaxSizeAtCompileTime=SizeAtCompileTime> class DiagonalMatrix;
template<typename MatrixType, typename DiagonalType, int ProductOrder> class DiagonalProduct;
template<typename MatrixType, int Index = 0> class Diagonal;
template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class PermutationMatrix;
template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime, typename IndexType=int> class Transpositions;
template<typename Derived> class PermutationBase;
template<typename Derived> class TranspositionsBase;
template<typename _IndicesType> class PermutationWrapper;
template<typename _IndicesType> class TranspositionsWrapper;
template<typename Derived,
int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
> class MapBase;
template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
template<typename Derived> class TriangularBase;
template<typename MatrixType, unsigned int Mode> class TriangularView;
template<typename MatrixType, unsigned int Mode> class SelfAdjointView;
template<typename MatrixType> class SparseView;
template<typename ExpressionType> class WithFormat;
template<typename MatrixType> struct CommaInitializer;
template<typename Derived> class ReturnByValue;
template<typename ExpressionType> class ArrayWrapper;
template<typename ExpressionType> class MatrixWrapper;
namespace internal {
template<typename DecompositionType, typename Rhs> struct solve_retval_base;
template<typename DecompositionType, typename Rhs> struct solve_retval;
template<typename DecompositionType> struct kernel_retval_base;
template<typename DecompositionType> struct kernel_retval;
template<typename DecompositionType> struct image_retval_base;
template<typename DecompositionType> struct image_retval;
} // end namespace internal
namespace internal {
template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynamic, int Subs=Dynamic, int Options=0> class BandMatrix;
}
namespace internal {
template<typename Lhs, typename Rhs> struct product_type;
}
template<typename Lhs, typename Rhs,
int ProductType = internal::product_type<Lhs,Rhs>::value>
struct ProductReturnType;
// this is a workaround for sun CC
template<typename Lhs, typename Rhs> struct LazyProductReturnType;
namespace internal {
// Provides scalar/packet-wise product and product with accumulation
// with optional conjugation of the arguments.
template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;
template<typename Scalar> struct scalar_sum_op;
template<typename Scalar> struct scalar_difference_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op;
template<typename Scalar> struct scalar_quotient_op;
template<typename Scalar> struct scalar_opposite_op;
template<typename Scalar> struct scalar_conjugate_op;
template<typename Scalar> struct scalar_real_op;
template<typename Scalar> struct scalar_imag_op;
template<typename Scalar> struct scalar_abs_op;
template<typename Scalar> struct scalar_abs2_op;
template<typename Scalar> struct scalar_sqrt_op;
template<typename Scalar> struct scalar_exp_op;
template<typename Scalar> struct scalar_log_op;
template<typename Scalar> struct scalar_cos_op;
template<typename Scalar> struct scalar_sin_op;
template<typename Scalar> struct scalar_acos_op;
template<typename Scalar> struct scalar_asin_op;
template<typename Scalar> struct scalar_tan_op;
template<typename Scalar> struct scalar_pow_op;
template<typename Scalar> struct scalar_inverse_op;
template<typename Scalar> struct scalar_square_op;
template<typename Scalar> struct scalar_cube_op;
template<typename Scalar, typename NewType> struct scalar_cast_op;
template<typename Scalar> struct scalar_multiple_op;
template<typename Scalar> struct scalar_quotient1_op;
template<typename Scalar> struct scalar_min_op;
template<typename Scalar> struct scalar_max_op;
template<typename Scalar> struct scalar_random_op;
template<typename Scalar> struct scalar_add_op;
template<typename Scalar> struct scalar_constant_op;
template<typename Scalar> struct scalar_identity_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
} // end namespace internal
struct IOFormat;
// Array module
template<typename _Scalar, int _Rows, int _Cols,
int _Options = AutoAlign |
#if defined(__GNUC__) && __GNUC__==3 && __GNUC_MINOR__==4
// workaround a bug in at least gcc 3.4.6
// the innermost ?: ternary operator is misparsed. We write it slightly
// differently and this makes gcc 3.4.6 happy, but it's ugly.
// The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
// (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
( (_Rows==1 && _Cols!=1) ? RowMajor
: !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
: ColMajor ),
#else
( (_Rows==1 && _Cols!=1) ? RowMajor
: (_Cols==1 && _Rows!=1) ? ColMajor
: EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
#endif
int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType> class Select;
template<typename MatrixType, typename BinaryOp, int Direction> class PartialReduxExpr;
template<typename ExpressionType, int Direction> class VectorwiseOp;
template<typename MatrixType,int RowFactor,int ColFactor> class Replicate;
template<typename MatrixType, int Direction = BothDirections> class Reverse;
template<typename MatrixType> class FullPivLU;
template<typename MatrixType> class PartialPivLU;
namespace internal {
template<typename MatrixType> struct inverse_impl;
}
template<typename MatrixType> class HouseholderQR;
template<typename MatrixType> class ColPivHouseholderQR;
template<typename MatrixType> class FullPivHouseholderQR;
template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
template<typename MatrixType, int UpLo = Lower> class LLT;
template<typename MatrixType, int UpLo = Lower> class LDLT;
template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
template<typename Scalar> class JacobiRotation;
// Geometry module:
template<typename Derived, int _Dim> class RotationBase;
template<typename Lhs, typename Rhs> class Cross;
template<typename Derived> class QuaternionBase;
template<typename Scalar> class Rotation2D;
template<typename Scalar> class AngleAxis;
template<typename Scalar,int Dim> class Translation;
#ifdef EIGEN2_SUPPORT
template<typename Derived, int _Dim> class eigen2_RotationBase;
template<typename Lhs, typename Rhs> class eigen2_Cross;
template<typename Scalar> class eigen2_Quaternion;
template<typename Scalar> class eigen2_Rotation2D;
template<typename Scalar> class eigen2_AngleAxis;
template<typename Scalar,int Dim> class eigen2_Transform;
template <typename _Scalar, int _AmbientDim> class eigen2_ParametrizedLine;
template <typename _Scalar, int _AmbientDim> class eigen2_Hyperplane;
template<typename Scalar,int Dim> class eigen2_Translation;
template<typename Scalar,int Dim> class eigen2_Scaling;
#endif
#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
template<typename Scalar> class Quaternion;
template<typename Scalar,int Dim> class Transform;
template <typename _Scalar, int _AmbientDim> class ParametrizedLine;
template <typename _Scalar, int _AmbientDim> class Hyperplane;
template<typename Scalar,int Dim> class Scaling;
#endif
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
template<typename Scalar, int Options = AutoAlign> class Quaternion;
template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
template<typename Scalar> class UniformScaling;
template<typename MatrixType,int Direction> class Homogeneous;
#endif
// MatrixFunctions module
template<typename Derived> struct MatrixExponentialReturnValue;
template<typename Derived> class MatrixFunctionReturnValue;
template<typename Derived> class MatrixSquareRootReturnValue;
template<typename Derived> class MatrixLogarithmReturnValue;
namespace internal {
template <typename Scalar>
struct stem_function
{
typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
typedef ComplexScalar type(ComplexScalar, int);
};
}
#ifdef EIGEN2_SUPPORT
template<typename ExpressionType> class Cwise;
template<typename MatrixType> class Minor;
template<typename MatrixType> class LU;
template<typename MatrixType> class QR;
template<typename MatrixType> class SVD;
namespace internal {
template<typename MatrixType, unsigned int Mode> struct eigen2_part_return_type;
}
#endif
} // end namespace Eigen
#endif // EIGEN_FORWARDDECLARATIONS_H
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to Intel(R) MKL
* Include file with common MKL declarations
********************************************************************************
*/
#ifndef EIGEN_MKL_SUPPORT_H
#define EIGEN_MKL_SUPPORT_H
#ifdef EIGEN_USE_MKL_ALL
#ifndef EIGEN_USE_BLAS
#define EIGEN_USE_BLAS
#endif
#ifndef EIGEN_USE_LAPACKE
#define EIGEN_USE_LAPACKE
#endif
#ifndef EIGEN_USE_MKL_VML
#define EIGEN_USE_MKL_VML
#endif
#endif
#ifdef EIGEN_USE_LAPACKE_STRICT
#define EIGEN_USE_LAPACKE
#endif
#if defined(EIGEN_USE_BLAS) || defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML)
#define EIGEN_USE_MKL
#endif
#if defined EIGEN_USE_MKL
#include <mkl.h>
#include <mkl_lapacke.h>
#define EIGEN_MKL_VML_THRESHOLD 128
namespace Eigen {
typedef std::complex<double> dcomplex;
typedef std::complex<float> scomplex;
namespace internal {
template<typename MKLType, typename EigenType>
static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
mklScalar=eigenScalar;
}
template<typename MKLType, typename EigenType>
static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) {
mklScalar=eigenScalar;
}
template <>
inline void assign_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=eigenScalar.imag();
}
template <>
inline void assign_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=eigenScalar.imag();
}
template <>
inline void assign_conj_scalar_eig2mkl<MKL_Complex16,dcomplex>(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=-eigenScalar.imag();
}
template <>
inline void assign_conj_scalar_eig2mkl<MKL_Complex8,scomplex>(MKL_Complex8& mklScalar, const scomplex& eigenScalar) {
mklScalar.real=eigenScalar.real();
mklScalar.imag=-eigenScalar.imag();
}
} // end namespace internal
} // end namespace Eigen
#endif
#endif // EIGEN_MKL_SUPPORT_H
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_MACROS_H
#define EIGEN_MACROS_H
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 1
#define EIGEN_MINOR_VERSION 3
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
EIGEN_MINOR_VERSION>=z))))
#ifdef __GNUC__
#define EIGEN_GNUC_AT_LEAST(x,y) ((__GNUC__==x && __GNUC_MINOR__>=y) || __GNUC__>x)
#else
#define EIGEN_GNUC_AT_LEAST(x,y) 0
#endif
#ifdef __GNUC__
#define EIGEN_GNUC_AT_MOST(x,y) ((__GNUC__==x && __GNUC_MINOR__<=y) || __GNUC__<x)
#else
#define EIGEN_GNUC_AT_MOST(x,y) 0
#endif
#if EIGEN_GNUC_AT_MOST(4,3) && !defined(__clang__)
// see bug 89
#define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0
#else
#define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
#endif
#if defined(__GNUC__) && (__GNUC__ <= 3)
#define EIGEN_GCC3_OR_OLDER 1
#else
#define EIGEN_GCC3_OR_OLDER 0
#endif
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
// certain common platform (compiler+architecture combinations) to avoid these problems.
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
// when we have to disable static alignment.
#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__))
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
#endif
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
&& !EIGEN_GCC3_OR_OLDER \
&& !defined(__SUNPRO_CC) \
&& !defined(__QNXNTO__)
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
#endif
#ifdef EIGEN_DONT_ALIGN
#ifndef EIGEN_DONT_ALIGN_STATICALLY
#define EIGEN_DONT_ALIGN_STATICALLY
#endif
#define EIGEN_ALIGN 0
#else
#define EIGEN_ALIGN 1
#endif
// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used.
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY)
#define EIGEN_ALIGN_STATICALLY 1
#else
#define EIGEN_ALIGN_STATICALLY 0
#ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#endif
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION RowMajor
#else
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor
#endif
#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t
#endif
/** Allows to disable some optimizations which might affect the accuracy of the result.
* Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them.
* They currently include:
* - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled.
*/
#ifndef EIGEN_FAST_MATH
#define EIGEN_FAST_MATH 1
#endif
#define EIGEN_DEBUG_VAR(x) std::cerr << #x << " = " << x << std::endl;
// concatenate two tokens
#define EIGEN_CAT2(a,b) a ## b
#define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)
// convert a token to a string
#define EIGEN_MAKESTRING2(a) #a
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
#if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER)
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
#else
#define EIGEN_FLATTEN_ATTRIB
#endif
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline.
#if (defined _MSC_VER) || (defined __INTEL_COMPILER)
#define EIGEN_STRONG_INLINE __forceinline
#else
#define EIGEN_STRONG_INLINE inline
#endif
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular,
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
// FIXME with the always_inline attribute,
// gcc 3.4.x reports the following compilation error:
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
// : function body not available
#if EIGEN_GNUC_AT_LEAST(4,0)
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
#else
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
#endif
#if (defined __GNUC__)
#define EIGEN_DONT_INLINE __attribute__((noinline))
#elif (defined _MSC_VER)
#define EIGEN_DONT_INLINE __declspec(noinline)
#else
#define EIGEN_DONT_INLINE
#endif
// this macro allows to get rid of linking errors about multiply defined functions.
// - static is not very good because it prevents definitions from different object files to be merged.
// So static causes the resulting linked executable to be bloated with multiple copies of the same function.
// - inline is not perfect either as it unwantedly hints the compiler toward inlining the function.
#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline
#ifdef NDEBUG
# ifndef EIGEN_NO_DEBUG
# define EIGEN_NO_DEBUG
# endif
#endif
// eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89
#ifdef EIGEN_NO_DEBUG
#define eigen_plain_assert(x)
#else
#if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO
namespace Eigen {
namespace internal {
inline bool copy_bool(bool b) { return b; }
}
}
#define eigen_plain_assert(x) assert(x)
#else
// work around bug 89
#include <cstdlib> // for abort
#include <iostream> // for std::cerr
namespace Eigen {
namespace internal {
// trivial function copying a bool. Must be EIGEN_DONT_INLINE, so we implement it after including Eigen headers.
// see bug 89.
namespace {
EIGEN_DONT_INLINE bool copy_bool(bool b) { return b; }
}
inline void assert_fail(const char *condition, const char *function, const char *file, int line)
{
std::cerr << "assertion failed: " << condition << " in function " << function << " at " << file << ":" << line << std::endl;
abort();
}
}
}
#define eigen_plain_assert(x) \
do { \
if(!Eigen::internal::copy_bool(x)) \
Eigen::internal::assert_fail(EIGEN_MAKESTRING(x), __PRETTY_FUNCTION__, __FILE__, __LINE__); \
} while(false)
#endif
#endif
// eigen_assert can be overridden
#ifndef eigen_assert
#define eigen_assert(x) eigen_plain_assert(x)
#endif
#ifdef EIGEN_INTERNAL_DEBUGGING
#define eigen_internal_assert(x) eigen_assert(x)
#else
#define eigen_internal_assert(x)
#endif
#ifdef EIGEN_NO_DEBUG
#define EIGEN_ONLY_USED_FOR_DEBUG(x) (void)x
#else
#define EIGEN_ONLY_USED_FOR_DEBUG(x)
#endif
#ifndef EIGEN_NO_DEPRECATED_WARNING
#if (defined __GNUC__)
#define EIGEN_DEPRECATED __attribute__((deprecated))
#elif (defined _MSC_VER)
#define EIGEN_DEPRECATED __declspec(deprecated)
#else
#define EIGEN_DEPRECATED
#endif
#else
#define EIGEN_DEPRECATED
#endif
#if (defined __GNUC__)
#define EIGEN_UNUSED __attribute__((unused))
#else
#define EIGEN_UNUSED
#endif
// Suppresses 'unused variable' warnings.
#define EIGEN_UNUSED_VARIABLE(var) (void)var;
#if !defined(EIGEN_ASM_COMMENT) && (defined __GNUC__)
#define EIGEN_ASM_COMMENT(X) asm("#" X)
#else
#define EIGEN_ASM_COMMENT(X)
#endif
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
* However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
* so that vectorization doesn't affect binary compatibility.
*
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
* vectorized and non-vectorized code.
*/
#if (defined __GNUC__) || (defined __PGI) || (defined __IBMCPP__) || (defined __ARMCC_VERSION)
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#elif (defined _MSC_VER)
#define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
#elif (defined __SUNPRO_CC)
// FIXME not sure about this one:
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
#else
#error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
#endif
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
#if EIGEN_ALIGN_STATICALLY
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n)
#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16
#else
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n)
#define EIGEN_USER_ALIGN16
#endif
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
#define EIGEN_RESTRICT
#endif
#ifndef EIGEN_RESTRICT
#define EIGEN_RESTRICT __restrict
#endif
#ifndef EIGEN_STACK_ALLOCATION_LIMIT
#define EIGEN_STACK_ALLOCATION_LIMIT 20000
#endif
#ifndef EIGEN_DEFAULT_IO_FORMAT
#ifdef EIGEN_MAKING_DOCS
// format used in Eigen's documentation
// needed to define it here as escaping characters in CMake add_definition's argument seems very problematic.
#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat(3, 0, " ", "\n", "", "")
#else
#define EIGEN_DEFAULT_IO_FORMAT Eigen::IOFormat()
#endif
#endif
// just an empty macro !
#define EIGEN_EMPTY
#if defined(_MSC_VER) && (!defined(__INTEL_COMPILER))
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =;
#else
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =; \
EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \
{ \
Base::operator=(other); \
return *this; \
}
#endif
#define EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Derived) \
EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived)
/**
* Just a side note. Commenting within defines works only by documenting
* behind the object (via '!<'). Comments cannot be multi-line and thus
* we have these extra long lines. What is confusing doxygen over here is
* that we use '\' and basically have a bunch of typedefs with their
* documentation in a single line.
**/
#define EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \
typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \
typedef typename Eigen::internal::nested<Derived>::type Nested; \
typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
typedef typename Eigen::internal::traits<Derived>::Index Index; \
enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
Flags = Eigen::internal::traits<Derived>::Flags, \
CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \
SizeAtCompileTime = Base::SizeAtCompileTime, \
MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
IsVectorAtCompileTime = Base::IsVectorAtCompileTime };
#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \
typedef typename Eigen::internal::traits<Derived>::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex<float>. */ \
typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex<T>, T were corresponding to RealScalar. */ \
typedef typename Base::PacketScalar PacketScalar; \
typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \
typedef typename Eigen::internal::nested<Derived>::type Nested; \
typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \
typedef typename Eigen::internal::traits<Derived>::Index Index; \
enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \
ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \
MaxRowsAtCompileTime = Eigen::internal::traits<Derived>::MaxRowsAtCompileTime, \
MaxColsAtCompileTime = Eigen::internal::traits<Derived>::MaxColsAtCompileTime, \
Flags = Eigen::internal::traits<Derived>::Flags, \
CoeffReadCost = Eigen::internal::traits<Derived>::CoeffReadCost, \
SizeAtCompileTime = Base::SizeAtCompileTime, \
MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \
IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \
using Base::derived; \
using Base::const_cast_derived;
#define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b)
#define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b)
// EIGEN_SIZE_MIN_PREFER_DYNAMIC gives the min between compile-time sizes. 0 has absolute priority, followed by 1,
// followed by Dynamic, followed by other finite values. The reason for giving Dynamic the priority over
// finite values is that min(3, Dynamic) should be Dynamic, since that could be anything between 0 and 3.
#define EIGEN_SIZE_MIN_PREFER_DYNAMIC(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
: ((int)a == 1 || (int)b == 1) ? 1 \
: ((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
: ((int)a <= (int)b) ? (int)a : (int)b)
// EIGEN_SIZE_MIN_PREFER_FIXED is a variant of EIGEN_SIZE_MIN_PREFER_DYNAMIC comparing MaxSizes. The difference is that finite values
// now have priority over Dynamic, so that min(3, Dynamic) gives 3. Indeed, whatever the actual value is
// (between 0 and 3), it is not more than 3.
#define EIGEN_SIZE_MIN_PREFER_FIXED(a,b) (((int)a == 0 || (int)b == 0) ? 0 \
: ((int)a == 1 || (int)b == 1) ? 1 \
: ((int)a == Dynamic && (int)b == Dynamic) ? Dynamic \
: ((int)a == Dynamic) ? (int)b \
: ((int)b == Dynamic) ? (int)a \
: ((int)a <= (int)b) ? (int)a : (int)b)
// see EIGEN_SIZE_MIN_PREFER_DYNAMIC. No need for a separate variant for MaxSizes here.
#define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \
: ((int)a >= (int)b) ? (int)a : (int)b)
#define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b)))
#define EIGEN_IMPLIES(a,b) (!(a) || (b))
#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \
template<typename OtherDerived> \
EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> \
(METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
{ \
return CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived>(derived(), other.derived()); \
}
// the expression type of a cwise product
#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \
CwiseBinaryOp< \
internal::scalar_product_op< \
typename internal::traits<LHS>::Scalar, \
typename internal::traits<RHS>::Scalar \
>, \
const LHS, \
const RHS \
>
#endif // EIGEN_MACROS_H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment