Commit 13b115ab authored by Simon Tulling's avatar Simon Tulling
Browse files

Fix Windows MSVC install by updating Eigen Library

parent 4d5343c3
......@@ -15,7 +15,7 @@ namespace Eigen {
namespace internal {
// pack a selfadjoint block diagonal for use with the gebp_kernel
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
struct symm_pack_lhs
{
template<int BlockRows> inline
......@@ -30,9 +30,9 @@ struct symm_pack_lhs
for(Index k=i; k<i+BlockRows; k++)
{
for(Index w=0; w<h; w++)
blockA[count++] = conj(lhs(k, i+w)); // transposed
blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
blockA[count++] = real(lhs(k,k)); // real (diagonal)
blockA[count++] = numext::real(lhs(k,k)); // real (diagonal)
for(Index w=h+1; w<BlockRows; w++)
blockA[count++] = lhs(i+w, k); // normal
......@@ -41,34 +41,41 @@ struct symm_pack_lhs
// transposed copy
for(Index k=i+BlockRows; k<cols; k++)
for(Index w=0; w<BlockRows; w++)
blockA[count++] = conj(lhs(k, i+w)); // transposed
blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
}
void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
{
enum { PacketSize = packet_traits<Scalar>::size };
const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
Index count = 0;
Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=Pack1)
{
pack<Pack1>(blockA, lhs, cols, i, count);
}
//Index peeled_mc3 = (rows/Pack1)*Pack1;
if(rows-peeled_mc>=Pack2)
{
pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
peeled_mc += Pack2;
}
const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
const Index peeled_mc1 = Pack1>=1*PacketSize ? (rows/(1*PacketSize))*(1*PacketSize) : 0;
if(Pack1>=3*PacketSize)
for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
pack<3*PacketSize>(blockA, lhs, cols, i, count);
if(Pack1>=2*PacketSize)
for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
pack<2*PacketSize>(blockA, lhs, cols, i, count);
if(Pack1>=1*PacketSize)
for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
pack<1*PacketSize>(blockA, lhs, cols, i, count);
// do the same with mr==1
for(Index i=peeled_mc; i<rows; i++)
for(Index i=peeled_mc1; i<rows; i++)
{
for(Index k=0; k<i; k++)
blockA[count++] = lhs(i, k); // normal
blockA[count++] = real(lhs(i, i)); // real (diagonal)
blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
for(Index k=i+1; k<cols; k++)
blockA[count++] = conj(lhs(k, i)); // transposed
blockA[count++] = numext::conj(lhs(k, i)); // transposed
}
}
};
......@@ -82,7 +89,8 @@ struct symm_pack_rhs
Index end_k = k2 + rows;
Index count = 0;
const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
Index packet_cols = (cols/nr)*nr;
Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
// first part: normal case
for(Index j2=0; j2<k2; j2+=nr)
......@@ -91,91 +99,163 @@ struct symm_pack_rhs
{
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
if (nr==4)
if (nr>=4)
{
blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = rhs(k,j2+3);
}
if (nr>=8)
{
blockB[count+4] = rhs(k,j2+4);
blockB[count+5] = rhs(k,j2+5);
blockB[count+6] = rhs(k,j2+6);
blockB[count+7] = rhs(k,j2+7);
}
count += nr;
}
}
// second part: diagonal block
for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
if(nr>=8)
{
for(Index j2=k2; j2<end8; j2+=8)
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for(Index k=k2; k<j2; k++)
{
blockB[count+0] = conj(rhs(j2+0,k));
blockB[count+1] = conj(rhs(j2+1,k));
if (nr==4)
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
blockB[count+2] = numext::conj(rhs(j2+2,k));
blockB[count+3] = numext::conj(rhs(j2+3,k));
blockB[count+4] = numext::conj(rhs(j2+4,k));
blockB[count+5] = numext::conj(rhs(j2+5,k));
blockB[count+6] = numext::conj(rhs(j2+6,k));
blockB[count+7] = numext::conj(rhs(j2+7,k));
count += 8;
}
// symmetric
Index h = 0;
for(Index k=j2; k<j2+8; k++)
{
// normal
for (Index w=0 ; w<h; ++w)
blockB[count+w] = rhs(k,j2+w);
blockB[count+h] = numext::real(rhs(k,k));
// transpose
for (Index w=h+1 ; w<8; ++w)
blockB[count+w] = numext::conj(rhs(j2+w,k));
count += 8;
++h;
}
// normal
for(Index k=j2+8; k<end_k; k++)
{
blockB[count+2] = conj(rhs(j2+2,k));
blockB[count+3] = conj(rhs(j2+3,k));
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = rhs(k,j2+3);
blockB[count+4] = rhs(k,j2+4);
blockB[count+5] = rhs(k,j2+5);
blockB[count+6] = rhs(k,j2+6);
blockB[count+7] = rhs(k,j2+7);
count += 8;
}
count += nr;
}
}
if(nr>=4)
{
for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for(Index k=k2; k<j2; k++)
{
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
blockB[count+2] = numext::conj(rhs(j2+2,k));
blockB[count+3] = numext::conj(rhs(j2+3,k));
count += 4;
}
// symmetric
Index h = 0;
for(Index k=j2; k<j2+nr; k++)
for(Index k=j2; k<j2+4; k++)
{
// normal
for (Index w=0 ; w<h; ++w)
blockB[count+w] = rhs(k,j2+w);
blockB[count+h] = real(rhs(k,k));
blockB[count+h] = numext::real(rhs(k,k));
// transpose
for (Index w=h+1 ; w<nr; ++w)
blockB[count+w] = conj(rhs(j2+w,k));
count += nr;
for (Index w=h+1 ; w<4; ++w)
blockB[count+w] = numext::conj(rhs(j2+w,k));
count += 4;
++h;
}
// normal
for(Index k=j2+nr; k<end_k; k++)
for(Index k=j2+4; k<end_k; k++)
{
blockB[count+0] = rhs(k,j2+0);
blockB[count+1] = rhs(k,j2+1);
if (nr==4)
{
blockB[count+2] = rhs(k,j2+2);
blockB[count+3] = rhs(k,j2+3);
count += 4;
}
count += nr;
}
}
// third part: transposed
for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
if(nr>=8)
{
for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
{
for(Index k=k2; k<end_k; k++)
{
blockB[count+0] = conj(rhs(j2+0,k));
blockB[count+1] = conj(rhs(j2+1,k));
if (nr==4)
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
blockB[count+2] = numext::conj(rhs(j2+2,k));
blockB[count+3] = numext::conj(rhs(j2+3,k));
blockB[count+4] = numext::conj(rhs(j2+4,k));
blockB[count+5] = numext::conj(rhs(j2+5,k));
blockB[count+6] = numext::conj(rhs(j2+6,k));
blockB[count+7] = numext::conj(rhs(j2+7,k));
count += 8;
}
}
}
if(nr>=4)
{
for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
{
blockB[count+2] = conj(rhs(j2+2,k));
blockB[count+3] = conj(rhs(j2+3,k));
for(Index k=k2; k<end_k; k++)
{
blockB[count+0] = numext::conj(rhs(j2+0,k));
blockB[count+1] = numext::conj(rhs(j2+1,k));
blockB[count+2] = numext::conj(rhs(j2+2,k));
blockB[count+3] = numext::conj(rhs(j2+3,k));
count += 4;
}
count += nr;
}
}
// copy the remaining columns one at a time (=> the same with nr==1)
for(Index j2=packet_cols; j2<cols; ++j2)
for(Index j2=packet_cols4; j2<cols; ++j2)
{
// transpose
Index half = (std::min)(end_k,j2);
for(Index k=k2; k<half; k++)
{
blockB[count] = conj(rhs(j2,k));
blockB[count] = numext::conj(rhs(j2,k));
count += 1;
}
if(half==j2 && half<k2+rows)
{
blockB[count] = real(rhs(j2,j2));
blockB[count] = numext::real(rhs(j2,j2));
count += 1;
}
else
......@@ -197,69 +277,85 @@ struct symm_pack_rhs
template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
int ResStorageOrder>
int ResStorageOrder, int ResInnerStride>
struct product_selfadjoint_matrix;
template <typename Scalar, typename Index,
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
int ResInnerStride>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
{
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha)
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
ColMajor>
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
ColMajor,ResInnerStride>
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
}
};
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
{
static EIGEN_DONT_INLINE void run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha)
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
Index size = rows;
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef gebp_traits<Scalar,Scalar> Traits;
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// kc must smaller than mc
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
// kc must be smaller than mc
kc = (std::min)(kc,mc);
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
for(Index k2=0; k2<size; k2+=kc)
{
......@@ -268,7 +364,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
// and expand each coeff to a constant packet for further reuse
pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
// the select lhs's panel has to be split in three different parts:
// 1 - the transposed panel above the diagonal block => transposed packed copy
......@@ -278,9 +374,9 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
{
const Index actual_mc = (std::min)(i2+mc,k2)-i2;
// transposed packed copy
pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
// the block diagonal
{
......@@ -288,53 +384,65 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
// symmetric packed copy
pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
for(Index i2=k2+kc; i2<size; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,size)-i2;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
}
};
// matrix * selfadjoint product
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
{
static EIGEN_DONT_INLINE void run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha)
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride>
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
Index rows, Index cols,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
Index size = cols;
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
typedef gebp_traits<Scalar,Scalar> Traits;
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
Scalar* blockB = allocatedBlockB + sizeW;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
ResMapper res(_res,resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=0; k2<size; k2+=kc)
......@@ -347,13 +455,12 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
for(Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,rows)-i2;
pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
}
}
}
};
} // end namespace internal
......@@ -362,19 +469,16 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
***************************************************************************/
namespace internal {
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
: traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
{};
}
template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
: public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
enum {
LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
......@@ -383,34 +487,41 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
};
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
template<typename Dest>
static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
* RhsBlasTraits::extractScalarFactor(a_rhs);
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
internal::product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(LhsIsUpper,
internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
EIGEN_LOGICAL_XOR(RhsIsUpper,
internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
Dest::InnerStrideAtCompileTime>
::run(
lhs.rows(), rhs.cols(), // sizes
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha // alpha
&dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
actualAlpha, blocking // alpha
);
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
namespace Eigen {
namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
{\
\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='L', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
\
/* Set transpose options */ \
/* Set m, n, k */ \
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
lda = convert_index<BlasIndex>(lhsStride); \
ldb = convert_index<BlasIndex>(rhsStride); \
ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (LhsStorageOrder==RowMajor) uplo='U'; \
a = _lhs; \
\
if (RhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
{\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='L', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
\
/* Set transpose options */ \
/* Set m, n, k */ \
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
lda = convert_index<BlasIndex>(lhsStride); \
ldb = convert_index<BlasIndex>(rhsStride); \
ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _lhs; \
if (LhsStorageOrder==RowMajor) uplo='U'; \
\
if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
b = _rhs; } \
else { \
if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
} else \
if (ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
#else
EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
#endif
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
{\
\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='R', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
\
/* Set m, n, k */ \
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
lda = convert_index<BlasIndex>(rhsStride); \
ldb = convert_index<BlasIndex>(lhsStride); \
ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (RhsStorageOrder==RowMajor) uplo='U'; \
a = _rhs; \
\
if (LhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = lhs.adjoint(); \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \
\
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
{\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='R', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
\
/* Set m, n, k */ \
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */ \
lda = convert_index<BlasIndex>(rhsStride); \
ldb = convert_index<BlasIndex>(lhsStride); \
ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */ \
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
a_tmp = rhs.conjugate(); \
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _rhs; \
if (RhsStorageOrder==RowMajor) uplo='U'; \
\
if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
b = _lhs; } \
else { \
if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
b_tmp = lhs.conjugate(); \
} else \
if (ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
#else
EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
......@@ -30,7 +30,16 @@ struct selfadjoint_matrix_vector_product
static EIGEN_DONT_INLINE void run(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* _rhs, Index rhsIncr,
const Scalar* rhs,
Scalar* res,
Scalar alpha);
};
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
Index size,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs,
Scalar* res,
Scalar alpha)
{
......@@ -46,23 +55,13 @@ static EIGEN_DONT_INLINE void run(
conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex, ConjugateRhs> cjd;
conj_helper<RealScalar,Scalar,false, ConjugateRhs> cjd;
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
Scalar cjAlpha = ConjugateRhs ? conj(alpha) : alpha;
Scalar cjAlpha = ConjugateRhs ? numext::conj(alpha) : alpha;
// FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
// if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
// this is because we need to extract packets
ei_declare_aligned_stack_constructed_variable(Scalar,rhs,size,rhsIncr==1 ? const_cast<Scalar*>(_rhs) : 0);
if (rhsIncr!=1)
{
const Scalar* it = _rhs;
for (Index i=0; i<size; ++i, it+=rhsIncr)
rhs[i] = *it;
}
Index bound = (std::max)(Index(0),size-8) & 0xfffffffe;
if (FirstTriangular)
......@@ -71,8 +70,8 @@ static EIGEN_DONT_INLINE void run(
for (Index j=FirstTriangular ? bound : 0;
j<(FirstTriangular ? size : bound);j+=2)
{
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
Scalar t0 = cjAlpha * rhs[j];
Packet ptmp0 = pset1<Packet>(t0);
......@@ -84,14 +83,13 @@ static EIGEN_DONT_INLINE void run(
Scalar t3(0);
Packet ptmp3 = pset1<Packet>(t3);
size_t starti = FirstTriangular ? 0 : j+2;
size_t endi = FirstTriangular ? j : size;
size_t alignedStart = (starti) + internal::first_aligned(&res[starti], endi-starti);
size_t alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
Index starti = FirstTriangular ? 0 : j+2;
Index endi = FirstTriangular ? j : size;
Index alignedStart = (starti) + internal::first_default_aligned(&res[starti], endi-starti);
Index alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res[j] += cjd.pmul(internal::real(A0[j]), t0);
res[j+1] += cjd.pmul(internal::real(A1[j+1]), t1);
res[j] += cjd.pmul(numext::real(A0[j]), t0);
res[j+1] += cjd.pmul(numext::real(A1[j+1]), t1);
if(FirstTriangular)
{
res[j] += cj0.pmul(A1[j], t1);
......@@ -103,11 +101,11 @@ static EIGEN_DONT_INLINE void run(
t2 += cj1.pmul(A0[j+1], rhs[j+1]);
}
for (size_t i=starti; i<alignedStart; ++i)
for (Index i=starti; i<alignedStart; ++i)
{
res[i] += t0 * A0[i] + t1 * A1[i];
t2 += conj(A0[i]) * rhs[i];
t3 += conj(A1[i]) * rhs[i];
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
t2 += cj1.pmul(A0[i], rhs[i]);
t3 += cj1.pmul(A1[i], rhs[i]);
}
// Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
// gcc 4.2 does this optimization automatically.
......@@ -115,7 +113,7 @@ static EIGEN_DONT_INLINE void run(
const Scalar* EIGEN_RESTRICT a1It = A1 + alignedStart;
const Scalar* EIGEN_RESTRICT rhsIt = rhs + alignedStart;
Scalar* EIGEN_RESTRICT resIt = res + alignedStart;
for (size_t i=alignedStart; i<alignedEnd; i+=PacketSize)
for (Index i=alignedStart; i<alignedEnd; i+=PacketSize)
{
Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
......@@ -127,7 +125,7 @@ static EIGEN_DONT_INLINE void run(
ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
pstore(resIt,Xi); resIt += PacketSize;
}
for (size_t i=alignedEnd; i<endi; i++)
for (Index i=alignedEnd; i<endi; i++)
{
res[i] += cj0.pmul(A0[i], t0) + cj0.pmul(A1[i],t1);
t2 += cj1.pmul(A0[i], rhs[i]);
......@@ -139,12 +137,11 @@ static EIGEN_DONT_INLINE void run(
}
for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
{
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
Scalar t1 = cjAlpha * rhs[j];
Scalar t2(0);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res[j] += cjd.pmul(internal::real(A0[j]), t1);
res[j] += cjd.pmul(numext::real(A0[j]), t1);
for (Index i=FirstTriangular ? 0 : j+1; i<(FirstTriangular ? j : size); i++)
{
res[i] += cj0.pmul(A0[i], t1);
......@@ -153,7 +150,6 @@ static EIGEN_DONT_INLINE void run(
res[j] += alpha * t2;
}
}
};
} // end namespace internal
......@@ -162,45 +158,44 @@ static EIGEN_DONT_INLINE void run(
***************************************************************************/
namespace internal {
template<typename Lhs, int LhsMode, typename Rhs>
struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true> >
: traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs> >
{};
}
template<typename Lhs, int LhsMode, typename Rhs>
struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
: public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>, Lhs, Rhs >
struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
enum {
LhsUpLo = LhsMode&(Upper|Lower)
};
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
enum { LhsUpLo = LhsMode&(Upper|Lower) };
template<typename Dest>
static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
{
typedef typename Dest::Scalar ResScalar;
typedef typename Base::RhsScalar RhsScalar;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
typedef typename Rhs::Scalar RhsScalar;
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
eigen_assert(dest.rows()==m_lhs.rows() && dest.cols()==m_rhs.cols());
eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
* RhsBlasTraits::extractScalarFactor(a_rhs);
enum {
EvalToDest = (Dest::InnerStrideAtCompileTime==1),
UseRhs = (_ActualRhsType::InnerStrideAtCompileTime==1)
UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1)
};
internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
internal::gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!UseRhs> static_rhs;
internal::gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!UseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(ResScalar,actualDestPtr,dest.size(),
EvalToDest ? dest.data() : static_dest.data());
......@@ -211,7 +206,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
if(!EvalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
Index size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
MappedDest(actualDestPtr, dest.size()) = dest;
......@@ -220,18 +215,19 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
if(!UseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = rhs.size();
Index size = rhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
}
internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
internal::selfadjoint_matrix_vector_product<Scalar, Index, (internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor,
int(LhsUpLo), bool(LhsBlasTraits::NeedToConjugate), bool(RhsBlasTraits::NeedToConjugate)>::run
(
lhs.rows(), // size
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
actualRhsPtr, 1, // rhs info
actualRhsPtr, // rhs info
actualDestPtr, // result info
actualAlpha // scale factor
);
......@@ -241,34 +237,24 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
}
};
namespace internal {
template<typename Lhs, typename Rhs, int RhsMode>
struct traits<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false> >
: traits<ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs> >
{};
}
template<typename Lhs, typename Rhs, int RhsMode>
struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>
: public ProductBase<SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>, Lhs, Rhs >
struct selfadjoint_product_impl<Lhs,0,true,Rhs,RhsMode,false>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(SelfadjointProductMatrix)
typedef typename Product<Lhs,Rhs>::Scalar Scalar;
enum { RhsUpLo = RhsMode&(Upper|Lower) };
enum {
RhsUpLo = RhsMode&(Upper|Lower)
};
SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
template<typename Dest>
static void run(Dest& dest, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
{
// let's simply transpose the product
Transpose<Dest> destT(dest);
SelfadjointProductMatrix<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
Transpose<const Lhs>, 0, true>(m_rhs.transpose(), m_lhs.transpose()).scaleAndAddTo(destT, alpha);
selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo)==Upper ? Lower : Upper, false,
Transpose<const Lhs>, 0, true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha);
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
namespace Eigen {
namespace internal {
/**********************************************************************
* This file implements selfadjoint matrix-vector multiplication using BLAS
**********************************************************************/
// symv/hemv specialization
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs>
struct selfadjoint_matrix_vector_product_symv :
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn> {};
#define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static void run( \
Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Scalar* res, Scalar alpha) { \
enum {\
IsColMajor = StorageOrder==ColMajor \
}; \
if (IsColMajor == ConjugateLhs) {\
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
size, lhs, lhsStride, _rhs, res, alpha); \
} else {\
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
size, lhs, lhsStride, _rhs, res, alpha); \
}\
} \
}; \
EIGEN_BLAS_SYMV_SPECIALIZE(double)
EIGEN_BLAS_SYMV_SPECIALIZE(float)
EIGEN_BLAS_SYMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_SYMV_SPECIALIZE(scomplex)
#define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
static void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
{ \
enum {\
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0 \
}; \
BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \
EIGTYPE beta(1); \
const EIGTYPE *x_ptr; \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
SYMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const SYMVVector, 0 > map_x(_rhs,size,1); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
} else x_ptr=_rhs; \
BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
#else
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
......@@ -18,21 +18,19 @@
namespace Eigen {
template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update;
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
{
static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
{
internal::conj_if<ConjRhs> cj;
typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjRhsType;
typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
for (Index i=0; i<size; ++i)
{
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
+= (alpha * cj(vec[i])) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+= (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
}
}
};
......@@ -40,9 +38,9 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
{
static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
{
selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vec,alpha);
selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
}
};
......@@ -52,10 +50,9 @@ struct selfadjoint_product_selector;
template<typename MatrixType, typename OtherType, int UpLo>
struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
{
static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef internal::blas_traits<OtherType> OtherBlasTraits;
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
......@@ -78,17 +75,16 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
(!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
}
};
template<typename MatrixType, typename OtherType, int UpLo>
struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
{
static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
{
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::Index Index;
typedef internal::blas_traits<OtherType> OtherBlasTraits;
typedef typename OtherBlasTraits::DirectLinearAccessType ActualOtherType;
typedef typename internal::remove_all<ActualOtherType>::type _ActualOtherType;
......@@ -96,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
enum {
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
};
Index size = mat.cols();
Index depth = actualOther.cols();
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
BlockingType blocking(size, size, depth, 1, false);
internal::general_matrix_matrix_triangular_product<Index,
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
::run(mat.cols(), actualOther.cols(),
Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
IsRowMajor ? RowMajor : ColMajor, MatrixType::InnerStrideAtCompileTime, UpLo>
::run(size, depth,
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
mat.data(), mat.outerStride(), actualAlpha);
mat.data(), mat.innerStride(), mat.outerStride(), actualAlpha, blocking);
}
};
......@@ -113,7 +121,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
{
selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);
......
......@@ -24,14 +24,14 @@ struct selfadjoint_rank2_update_selector;
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
{
static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
{
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
(conj(alpha) * conj(u.coeff(i))) * v.tail(size-i)
+ (alpha * conj(v.coeff(i))) * u.tail(size-i);
(numext::conj(alpha) * numext::conj(u.coeff(i))) * v.tail(size-i)
+ (alpha * numext::conj(v.coeff(i))) * u.tail(size-i);
}
}
};
......@@ -39,13 +39,13 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
template<typename Scalar, typename Index, typename UType, typename VType>
struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
{
static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
{
const Index size = u.size();
for (Index i=0; i<size; ++i)
Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
(conj(alpha) * conj(u.coeff(i))) * v.head(i+1)
+ (alpha * conj(v.coeff(i))) * u.head(i+1);
(numext::conj(alpha) * numext::conj(u.coeff(i))) * v.head(i+1)
+ (alpha * numext::conj(v.coeff(i))) * u.head(i+1);
}
};
......@@ -58,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
template<typename MatrixType, unsigned int UpLo>
template<typename DerivedU, typename DerivedV>
SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
{
typedef internal::blas_traits<DerivedU> UBlasTraits;
typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
......@@ -75,15 +75,15 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
Scalar actualAlpha = alpha * UBlasTraits::extractScalarFactor(u.derived())
* internal::conj(VBlasTraits::extractScalarFactor(v.derived()));
* numext::conj(VBlasTraits::extractScalarFactor(v.derived()));
if (IsRowMajor)
actualAlpha = internal::conj(actualAlpha);
actualAlpha = numext::conj(actualAlpha);
internal::selfadjoint_rank2_update_selector<Scalar, Index,
typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type,
typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type,
typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ UBlasTraits::NeedToConjugate,_ActualUType>::type>::type UType;
typedef typename internal::remove_all<typename internal::conj_expr_if<IsRowMajor ^ VBlasTraits::NeedToConjugate,_ActualVType>::type>::type VType;
internal::selfadjoint_rank2_update_selector<Scalar, Index, UType, VType,
(IsRowMajor ? int(UpLo==Upper ? Lower : Upper) : UpLo)>
::run(_expression().const_cast_derived().data(),_expression().outerStride(),actualU,actualV,actualAlpha);
::run(_expression().const_cast_derived().data(),_expression().outerStride(),UType(actualU),VType(actualV),actualAlpha);
return *this;
}
......
......@@ -45,23 +45,25 @@ template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder, int Version = Specialized>
int ResStorageOrder, int ResInnerStride,
int Version = Specialized>
struct product_triangular_matrix_matrix;
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,RowMajor,Version>
RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version>
{
static EIGEN_STRONG_INLINE void run(
Index rows, Index cols, Index depth,
const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
product_triangular_matrix_matrix<Scalar, Index,
(Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
......@@ -70,18 +72,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
ConjugateRhs,
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
ConjugateLhs,
ColMajor>
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
ColMajor, ResInnerStride>
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
}
};
// implements col-major += alpha * op(triangular) * op(general)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
......@@ -95,8 +98,22 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
// strip zeros
Index diagSize = (std::min)(_rows,_depth);
......@@ -104,30 +121,42 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index depth = IsLower ? diagSize : _depth;
Index cols = _cols;
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
// The small panel size must not be larger than blocking size.
// Usually this should never be the case because SmallPanelWidth^2 is very small
// compared to L2 cache size, but let's be safe:
Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
// To work around an "error: member reference base type 'Matrix<...>
// (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
// not a structure or union" compilation error in nvcc (tested V8.0.61),
// create a dummy internal::constructor_without_unaligned_array_assert
// object to pass to the Matrix constructor.
internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
else
triangularBuffer.diagonal().setOnes();
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
for(Index k2=IsLower ? depth : 0;
IsLower ? k2>0 : k2<depth;
......@@ -143,7 +172,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
k2 = k2+actual_kc-kc;
}
pack_rhs(blockB, &rhs(actual_k2,0), rhsStride, actual_kc, cols);
pack_rhs(blockB, rhs.getSubMapper(actual_k2,0), actual_kc, cols);
// the selected lhs's panel has to be split in three different parts:
// 1 - the part which is zero => skip it
......@@ -154,9 +183,9 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
if(IsLower || actual_k2<rows)
{
// for each small vertical panels of lhs
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
for (Index k1=0; k1<actual_kc; k1+=panelWidth)
{
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
Index startBlock = actual_k2+k1;
Index blockBOffset = k1;
......@@ -171,20 +200,22 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
for (Index i=IsLower ? k+1 : 0; IsLower ? i<actualPanelWidth : i<k; ++i)
triangularBuffer.coeffRef(i,k) = lhs(startBlock+i,startBlock+k);
}
pack_lhs(blockA, triangularBuffer.data(), triangularBuffer.outerStride(), actualPanelWidth, actualPanelWidth);
pack_lhs(blockA, LhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()), actualPanelWidth, actualPanelWidth);
gebp_kernel(res+startBlock, resStride, blockA, blockB, actualPanelWidth, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
gebp_kernel(res.getSubMapper(startBlock, 0), blockA, blockB,
actualPanelWidth, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset);
// GEBP with remaining micro panel
if (lengthTarget>0)
{
Index startTarget = IsLower ? actual_k2+k1+actualPanelWidth : actual_k2;
pack_lhs(blockA, &lhs(startTarget,startBlock), lhsStride, actualPanelWidth, lengthTarget);
pack_lhs(blockA, lhs.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
gebp_kernel(res+startTarget, resStride, blockA, blockB, lengthTarget, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
gebp_kernel(res.getSubMapper(startTarget, 0), blockA, blockB,
lengthTarget, actualPanelWidth, cols, alpha,
actualPanelWidth, actual_kc, 0, blockBOffset);
}
}
}
......@@ -195,23 +226,24 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
for(Index i2=start; i2<end; i2+=mc)
{
const Index actual_mc = (std::min)(i2+mc,end)-i2;
gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc,
actual_kc, cols, alpha, -1, -1, 0, 0);
}
}
}
}
};
// implements col-major += alpha * op(general) * op(triangular)
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs, int Version>
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
{
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
......@@ -224,40 +256,58 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride,
Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
Scalar* res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResInnerStride, int Version>
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
LhsStorageOrder,ConjugateLhs,
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
Index _rows, Index _cols, Index _depth,
const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resIncr, Index resStride,
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{
const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
// strip zeros
Index diagSize = (std::min)(_cols,_depth);
Index rows = _rows;
Index depth = IsLower ? _depth : diagSize;
Index cols = IsLower ? diagSize : _cols;
const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
LhsMapper lhs(_lhs,lhsStride);
RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride, resIncr);
Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer;
internal::constructor_without_unaligned_array_assert a;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
else
triangularBuffer.diagonal().setOnes();
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
for(Index k2=IsLower ? 0 : depth;
IsLower ? k2<depth : k2>0;
......@@ -279,8 +329,9 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
Scalar* geb = blockB+ts*ts;
geb = geb + internal::first_aligned<PacketBytes>(geb,PacketBytes/sizeof(Scalar));
pack_rhs(geb, &rhs(actual_k2,IsLower ? 0 : k2), rhsStride, actual_kc, rs);
pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);
// pack the triangular part of the rhs padding the unrolled blocks with zeros
if(ts>0)
......@@ -293,7 +344,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index panelLength = IsLower ? actual_kc-j2-actualPanelWidth : j2;
// general part
pack_rhs_panel(blockB+j2*actual_kc,
&rhs(actual_k2+panelOffset, actual_j2), rhsStride,
rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
panelLength, actualPanelWidth,
actual_kc, panelOffset);
......@@ -307,7 +358,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
}
pack_rhs_panel(blockB+j2*actual_kc,
triangularBuffer.data(), triangularBuffer.outerStride(),
RhsMapper(triangularBuffer.data(), triangularBuffer.outerStride()),
actualPanelWidth, actualPanelWidth,
actual_kc, j2);
}
......@@ -316,7 +367,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
for (Index i2=0; i2<rows; i2+=mc)
{
const Index actual_mc = (std::min)(mc,rows-i2);
pack_lhs(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
pack_lhs(blockA, lhs.getSubMapper(i2, actual_k2), actual_kc, actual_mc);
// triangular kernel
if(ts>0)
......@@ -327,50 +378,51 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index panelLength = IsLower ? actual_kc-j2 : j2+actualPanelWidth;
Index blockOffset = IsLower ? j2 : 0;
gebp_kernel(res+i2+(actual_k2+j2)*resStride, resStride,
gebp_kernel(res.getSubMapper(i2, actual_k2 + j2),
blockA, blockB+j2*actual_kc,
actual_mc, panelLength, actualPanelWidth,
alpha,
actual_kc, actual_kc, // strides
blockOffset, blockOffset,// offsets
blockW); // workspace
blockOffset, blockOffset);// offsets
}
}
gebp_kernel(res+i2+(IsLower ? 0 : k2)*resStride, resStride,
gebp_kernel(res.getSubMapper(i2, IsLower ? 0 : k2),
blockA, geb, actual_mc, actual_kc, rs,
alpha,
-1, -1, 0, 0, blockW);
-1, -1, 0, 0);
}
}
}
};
/***************************************************************************
* Wrapper to product_triangular_matrix_matrix
***************************************************************************/
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false> >
: traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs> >
{};
} // end namespace internal
namespace internal {
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
: public ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>, Lhs, Rhs >
struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
{
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar Scalar;
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
{
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
* RhsBlasTraits::extractScalarFactor(m_rhs);
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
......@@ -381,23 +433,40 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
Index stripedDepth = LhsIsTriangular ? ((!IsLower) ? lhs.cols() : (std::min)(lhs.cols(),lhs.rows()))
: ((IsLower) ? rhs.rows() : (std::min)(rhs.rows(),rhs.cols()));
BlockingType blocking(stripedRows, stripedCols, stripedDepth);
BlockingType blocking(stripedRows, stripedCols, stripedDepth, 1, false);
internal::product_triangular_matrix_matrix<Scalar, Index,
Mode, LhsIsTriangular,
(internal::traits<_ActualLhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
(internal::traits<_ActualRhsType>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
(internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
(internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>
::run(
stripedRows, stripedCols, stripedDepth, // sizes
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info
&dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
actualAlpha, blocking
);
// Apply correction if the diagonal is unit and a scalar factor was nested:
if ((Mode&UnitDiag)==UnitDiag)
{
if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
}
else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
{
Index diagSize = (std::min)(rhs.rows(),rhs.cols());
dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
}
}
}
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
namespace Eigen {
namespace internal {
template <typename Scalar, typename Index,
int Mode, bool LhsIsTriangular,
int LhsStorageOrder, bool ConjugateLhs,
int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
struct product_triangular_matrix_matrix_trmm :
product_triangular_matrix_matrix<Scalar,Index,Mode,
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
RhsStorageOrder, ConjugateRhs, ResStorageOrder, 1, BuiltIn> {};
// try to go to BLAS specialization
#define EIGEN_BLAS_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
} \
};
EIGEN_BLAS_TRMM_SPECIALIZE(double, true)
EIGEN_BLAS_TRMM_SPECIALIZE(double, false)
EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, true)
EIGEN_BLAS_TRMM_SPECIALIZE(dcomplex, false)
EIGEN_BLAS_TRMM_SPECIALIZE(float, true)
EIGEN_BLAS_TRMM_SPECIALIZE(float, false)
EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
}; \
\
static void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
{ \
Index diagSize = (std::min)(_rows,_depth); \
Index rows = IsLower ? _rows : diagSize; \
Index depth = IsLower ? diagSize : _depth; \
Index cols = _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (rows != depth) { \
\
/* FIXME handle mkl_domain_get_max_threads */ \
/*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/ int nthr = 1;\
\
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
/* Most likely no benefit to call TRMM or GEMM from BLAS */ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, 1, resStride, alpha, gemm_blocking, 0); \
\
/*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'L', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
BlasIndex m, n, lda, ldb; \
\
/* Set m, n */ \
m = convert_index<BlasIndex>(diagSize); \
n = convert_index<BlasIndex>(cols); \
\
/* Set trans */ \
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */ \
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _lhs; \
lda = convert_index<BlasIndex>(lhsStride); \
} \
/*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
EIGEN_BLAS_TRMM_L(float, float, f, strmm)
EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
#else
EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
#endif
// implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
}; \
\
static void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
{ \
Index diagSize = (std::min)(_cols,_depth); \
Index rows = _rows; \
Index depth = IsLower ? _depth : diagSize; \
Index cols = IsLower ? diagSize : _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/ \
if (cols != depth) { \
\
int nthr = 1 /*mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS)*/; \
\
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
/* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
} else { \
/* Make sense to call GEMM */ \
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, 1, resStride, alpha, gemm_blocking, 0); \
\
/*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/ \
} \
return; \
} \
char side = 'R', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
BlasIndex m, n, lda, ldb; \
\
/* Set m, n */ \
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(diagSize); \
\
/* Set trans */ \
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */ \
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _rhs; \
lda = convert_index<BlasIndex>(rhsStride); \
} \
/*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
EIGEN_BLAS_TRMM_R(float, float, f, strmm)
EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
#else
EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
......@@ -20,14 +20,20 @@ struct triangular_matrix_vector_product;
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const RhsScalar& alpha);
};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const RhsScalar& alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index size = (std::min)(_rows,_cols);
......@@ -45,6 +51,9 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef Map<Matrix<ResScalar,Dynamic,1> > ResMap;
ResMap res(_res,rows);
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
for (Index pi=0; pi<size; pi+=PanelWidth)
{
Index actualPanelWidth = (std::min)(PanelWidth, size-pi);
......@@ -62,35 +71,40 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
if (r>0)
{
Index s = IsLower ? pi+actualPanelWidth : 0;
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs,BuiltIn>::run(
r, actualPanelWidth,
&lhs.coeffRef(s,pi), lhsStride,
&rhs.coeffRef(pi), rhsIncr,
LhsMapper(&lhs.coeffRef(s,pi), lhsStride),
RhsMapper(&rhs.coeffRef(pi), rhsIncr),
&res.coeffRef(s), resIncr, alpha);
}
}
if((!IsLower) && cols>size)
{
general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjLhs,RhsScalar,ConjRhs>::run(
general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs>::run(
rows, cols-size,
&lhs.coeffRef(0,size), lhsStride,
&rhs.coeffRef(size), rhsIncr,
LhsMapper(&lhs.coeffRef(0,size), lhsStride),
RhsMapper(&rhs.coeffRef(size), rhsIncr),
_res, resIncr, alpha);
}
}
};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
{
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
enum {
IsLower = ((Mode&Lower)==Lower),
HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
};
static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
};
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
{
static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
Index diagSize = (std::min)(_rows,_cols);
......@@ -108,6 +122,9 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef Map<Matrix<ResScalar,Dynamic,1>, 0, InnerStride<> > ResMap;
ResMap res(_res,rows,InnerStride<>(resIncr));
typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar,Index,RowMajor> RhsMapper;
for (Index pi=0; pi<diagSize; pi+=PanelWidth)
{
Index actualPanelWidth = (std::min)(PanelWidth, diagSize-pi);
......@@ -125,105 +142,88 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
if (r>0)
{
Index s = IsLower ? 0 : pi + actualPanelWidth;
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs,BuiltIn>::run(
general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs,BuiltIn>::run(
actualPanelWidth, r,
&lhs.coeffRef(pi,s), lhsStride,
&rhs.coeffRef(s), rhsIncr,
LhsMapper(&lhs.coeffRef(pi,s), lhsStride),
RhsMapper(&rhs.coeffRef(s), rhsIncr),
&res.coeffRef(pi), resIncr, alpha);
}
}
if(IsLower && rows>diagSize)
{
general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjLhs,RhsScalar,ConjRhs>::run(
general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjLhs,RhsScalar,RhsMapper,ConjRhs>::run(
rows-diagSize, cols,
&lhs.coeffRef(diagSize,0), lhsStride,
&rhs.coeffRef(0), rhsIncr,
LhsMapper(&lhs.coeffRef(diagSize,0), lhsStride),
RhsMapper(&rhs.coeffRef(0), rhsIncr),
&res.coeffRef(diagSize), resIncr, alpha);
}
}
};
/***************************************************************************
* Wrapper to product_triangular_vector
***************************************************************************/
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true> >
: traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,true>, Lhs, Rhs> >
{};
template<int Mode, bool LhsIsTriangular, typename Lhs, typename Rhs>
struct traits<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false> >
: traits<ProductBase<TriangularProduct<Mode,LhsIsTriangular,Lhs,true,Rhs,false>, Lhs, Rhs> >
{};
template<int StorageOrder>
template<int Mode,int StorageOrder>
struct trmv_selector;
} // end namespace internal
namespace internal {
template<int Mode, typename Lhs, typename Rhs>
struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>
: public ProductBase<TriangularProduct<Mode,true,Lhs,false,Rhs,true>, Lhs, Rhs >
struct triangular_product_impl<Mode,true,Lhs,false,Rhs,true>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());
internal::trmv_selector<(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dst, alpha);
internal::trmv_selector<Mode,(int(internal::traits<Lhs>::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(lhs, rhs, dst, alpha);
}
};
template<int Mode, typename Lhs, typename Rhs>
struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>
: public ProductBase<TriangularProduct<Mode,false,Lhs,true,Rhs,false>, Lhs, Rhs >
struct triangular_product_impl<Mode,false,Lhs,true,Rhs,false>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE(TriangularProduct)
TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}
template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
template<typename Dest> static void run(Dest& dst, const Lhs &lhs, const Rhs &rhs, const typename Dest::Scalar& alpha)
{
eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
eigen_assert(dst.rows()==lhs.rows() && dst.cols()==rhs.cols());
typedef TriangularProduct<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),true,Transpose<const Rhs>,false,Transpose<const Lhs>,true> TriangularProductTranspose;
Transpose<Dest> dstT(dst);
internal::trmv_selector<(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>::run(
TriangularProductTranspose(m_rhs.transpose(),m_lhs.transpose()), dstT, alpha);
internal::trmv_selector<(Mode & (UnitDiag|ZeroDiag)) | ((Mode & Lower) ? Upper : Lower),
(int(internal::traits<Rhs>::Flags)&RowMajorBit) ? ColMajor : RowMajor>
::run(rhs.transpose(),lhs.transpose(), dstT, alpha);
}
};
} // end namespace internal
namespace internal {
// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
template<> struct trmv_selector<ColMajor>
template<int Mode> struct trmv_selector<Mode,ColMajor>
{
template<int Mode, typename Lhs, typename Rhs, typename Dest>
static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
template<typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
typedef typename ProductType::Index Index;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::RealScalar RealScalar;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar ResScalar;
typedef typename Dest::RealScalar RealScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef Map<Matrix<ResScalar,Dynamic,1>, EIGEN_PLAIN_ENUM_MIN(AlignedMax,internal::packet_traits<ResScalar>::size)> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
......@@ -235,7 +235,7 @@ template<> struct trmv_selector<ColMajor>
gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,MightCannotUseDest> static_dest;
bool alphaIsCompatible = (!ComplexByReal) || (imag(actualAlpha)==RealScalar(0));
bool alphaIsCompatible = (!ComplexByReal) || (numext::imag(actualAlpha)==RealScalar(0));
bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible;
RhsScalar compatibleAlpha = get_factor<ResScalar,RhsScalar>::run(actualAlpha);
......@@ -246,7 +246,7 @@ template<> struct trmv_selector<ColMajor>
if(!evalToDest)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = dest.size();
Index size = dest.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if(!alphaIsCompatible)
......@@ -275,36 +275,42 @@ template<> struct trmv_selector<ColMajor>
else
dest = MappedDest(actualDestPtr, dest.size());
}
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
}
}
};
template<> struct trmv_selector<RowMajor>
template<int Mode> struct trmv_selector<Mode,RowMajor>
{
template<int Mode, typename Lhs, typename Rhs, typename Dest>
static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
template<typename Lhs, typename Rhs, typename Dest>
static void run(const Lhs &lhs, const Rhs &rhs, Dest& dest, const typename Dest::Scalar& alpha)
{
typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
typedef typename ProductType::LhsScalar LhsScalar;
typedef typename ProductType::RhsScalar RhsScalar;
typedef typename ProductType::Scalar ResScalar;
typedef typename ProductType::Index Index;
typedef typename ProductType::ActualLhsType ActualLhsType;
typedef typename ProductType::ActualRhsType ActualRhsType;
typedef typename ProductType::_ActualRhsType _ActualRhsType;
typedef typename ProductType::LhsBlasTraits LhsBlasTraits;
typedef typename ProductType::RhsBlasTraits RhsBlasTraits;
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs())
* RhsBlasTraits::extractScalarFactor(prod.rhs());
typedef typename Lhs::Scalar LhsScalar;
typedef typename Rhs::Scalar RhsScalar;
typedef typename Dest::Scalar ResScalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum {
DirectlyUseRhs = _ActualRhsType::InnerStrideAtCompileTime==1
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
};
gemv_static_vector_if<RhsScalar,_ActualRhsType::SizeAtCompileTime,_ActualRhsType::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!DirectlyUseRhs> static_rhs;
ei_declare_aligned_stack_constructed_variable(RhsScalar,actualRhsPtr,actualRhs.size(),
DirectlyUseRhs ? const_cast<RhsScalar*>(actualRhs.data()) : static_rhs.data());
......@@ -312,10 +318,10 @@ template<> struct trmv_selector<RowMajor>
if(!DirectlyUseRhs)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int size = actualRhs.size();
Index size = actualRhs.size();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map<typename _ActualRhsType::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
}
internal::triangular_matrix_vector_product
......@@ -328,6 +334,12 @@ template<> struct trmv_selector<RowMajor>
actualRhsPtr,1,
dest.data(),dest.innerStride(),
actualAlpha);
if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
{
Index diagSize = (std::min)(lhs.rows(),lhs.cols());
dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
}
}
};
......
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Triangular matrix-vector product functionality based on ?TRMV.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
#define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
namespace Eigen {
namespace internal {
/**********************************************************************
* This file implements triangular matrix-vector multiplication using BLAS
**********************************************************************/
// trmv/hemv specialization
template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int StorageOrder>
struct triangular_matrix_vector_product_trmv :
triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,StorageOrder,BuiltIn> {};
#define EIGEN_BLAS_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
}; \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
};
EIGEN_BLAS_TRMV_SPECIALIZE(double)
EIGEN_BLAS_TRMV_SPECIALIZE(float)
EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (ConjLhs || IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/ \
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */\
\
char trans, uplo, diag; \
BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \
EIGTYPE beta(1); \
\
/* Set m, n */ \
n = convert_index<BlasIndex>(size); \
lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \
incy = convert_index<BlasIndex>(resIncr); \
\
/* Set uplo, trans and diag*/ \
trans = 'N'; \
uplo = IsLower ? 'L' : 'U'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size; \
m = convert_index<BlasIndex>(rows-size); \
n = convert_index<BlasIndex>(size); \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size*lda; \
m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \
} \
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_CM(double, double, d, d,)
EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
EIGEN_BLAS_TRMV_CM(float, float, f, s,)
EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,)
#else
EIGEN_BLAS_TRMV_CM(double, double, d, d, _)
EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
EIGEN_BLAS_TRMV_CM(float, float, f, s, _)
EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _)
#endif
// implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/ \
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */\
\
char trans, uplo, diag; \
BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \
EIGTYPE beta(1); \
\
/* Set m, n */ \
n = convert_index<BlasIndex>(size); \
lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \
incy = convert_index<BlasIndex>(resIncr); \
\
/* Set uplo, trans and diag*/ \
trans = ConjLhs ? 'C' : 'T'; \
uplo = IsLower ? 'U' : 'L'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size*lda; \
m = convert_index<BlasIndex>(rows-size); \
n = convert_index<BlasIndex>(size); \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size; \
m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \
} \
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_RM(double, double, d, d,)
EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
EIGEN_BLAS_TRMV_RM(float, float, f, s,)
EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,)
#else
EIGEN_BLAS_TRMV_RM(double, double, d, d,_)
EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
EIGEN_BLAS_TRMV_RM(float, float, f, s,_)
EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_)
#endif
} // end namespase internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
......@@ -15,40 +15,51 @@ namespace Eigen {
namespace internal {
// if the rhs is row major, let's transpose the product
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>
{
static EIGEN_DONT_INLINE void run(
static void run(
Index size, Index cols,
const Scalar* tri, Index triStride,
Scalar* _other, Index otherStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
triangular_solve_matrix<
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
NumTraits<Scalar>::IsComplex && Conjugate,
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
::run(size, cols, tri, triStride, _other, otherStride, blocking);
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
}
};
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
*/
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
{
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index cols = otherSize;
const_blas_data_mapper<Scalar, Index, TriStorageOrder> tri(_tri,triStride);
blas_data_mapper<Scalar, Index, ColMajor> other(_other,otherStride);
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
TriMapper tri(_tri, triStride);
OtherMapper other(_other, otherStride, otherIncr);
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
SmallPanelWidth = EIGEN_PLAIN_ENUM_MAX(Traits::mr,Traits::nr),
IsLower = (Mode&Lower) == Lower
......@@ -59,22 +70,20 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
conj_if<Conjugate> conj;
gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, Traits::nr, ColMajor, false, true> pack_rhs;
gebp_kernel<Scalar, Scalar, Index, OtherMapper, Traits::mr, Traits::nr, Conjugate, false> gebp_kernel;
gemm_pack_lhs<Scalar, Index, TriMapper, Traits::mr, Traits::LhsProgress, TriStorageOrder> pack_lhs;
gemm_pack_rhs<Scalar, Index, OtherMapper, Traits::nr, ColMajor, false, true> pack_rhs;
// the goal here is to subdivise the Rhs panels such that we keep some cache
// coherence when accessing the rhs elements
std::ptrdiff_t l1, l2;
manage_caching_sizes(GetAction, &l1, &l2);
Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * otherStride) : 0;
std::ptrdiff_t l1, l2, l3;
manage_caching_sizes(GetAction, &l1, &l2, &l3);
Index subcols = cols>0 ? l2/(4 * sizeof(Scalar) * std::max<Index>(otherStride,size)) : 0;
subcols = std::max<Index>((subcols/Traits::nr)*Traits::nr, Traits::nr);
for(Index k2=IsLower ? 0 : size;
......@@ -108,8 +117,9 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
{
// TODO write a small kernel handling this (can be shared with trsv)
Index i = IsLower ? k2+k1+k : k2-k1-k-1;
Index s = IsLower ? k2+k1 : i+1;
Index rs = actualPanelWidth - k - 1; // remaining size
Index s = TriStorageOrder==RowMajor ? (IsLower ? k2+k1 : i+1)
: IsLower ? i+1 : i-rs;
Scalar a = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(tri(i,i));
for (Index j=j2; j<j2+actual_cols; ++j)
......@@ -118,20 +128,19 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
{
Scalar b(0);
const Scalar* l = &tri(i,s);
Scalar* r = &other(s,j);
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
for (Index i3=0; i3<k; ++i3)
b += conj(l[i3]) * r[i3];
b += conj(l[i3]) * r(i3);
other(i,j) = (other(i,j) - b)*a;
}
else
{
Index s = IsLower ? i+1 : i-rs;
Scalar b = (other(i,j) *= a);
Scalar* r = &other(s,j);
const Scalar* l = &tri(s,i);
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
for (Index i3=0;i3<rs;++i3)
r[i3] -= b * conj(l[i3]);
r(i3) -= b * conj(l(i3));
}
}
}
......@@ -141,17 +150,17 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index blockBOffset = IsLower ? k1 : lengthTarget;
// update the respective rows of B from other
pack_rhs(blockB+actual_kc*j2, &other(startBlock,j2), otherStride, actualPanelWidth, actual_cols, actual_kc, blockBOffset);
pack_rhs(blockB+actual_kc*j2, other.getSubMapper(startBlock,j2), actualPanelWidth, actual_cols, actual_kc, blockBOffset);
// GEBP
if (lengthTarget>0)
{
Index startTarget = IsLower ? k2+k1+actualPanelWidth : k2-actual_kc;
pack_lhs(blockA, &tri(startTarget,startBlock), triStride, actualPanelWidth, lengthTarget);
pack_lhs(blockA, tri.getSubMapper(startTarget,startBlock), actualPanelWidth, lengthTarget);
gebp_kernel(&other(startTarget,j2), otherStride, blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
actualPanelWidth, actual_kc, 0, blockBOffset, blockW);
gebp_kernel(other.getSubMapper(startTarget,j2), blockA, blockB+actual_kc*j2, lengthTarget, actualPanelWidth, actual_cols, Scalar(-1),
actualPanelWidth, actual_kc, 0, blockBOffset);
}
}
}
......@@ -165,30 +174,40 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
const Index actual_mc = (std::min)(mc,end-i2);
if (actual_mc>0)
{
pack_lhs(blockA, &tri(i2, IsLower ? k2 : k2-kc), triStride, actual_kc, actual_mc);
pack_lhs(blockA, tri.getSubMapper(i2, IsLower ? k2 : k2-kc), actual_kc, actual_mc);
gebp_kernel(_other+i2, otherStride, blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0, blockW);
gebp_kernel(other.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, Scalar(-1), -1, -1, 0, 0);
}
}
}
}
}
};
/* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
*/
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
{
static EIGEN_DONT_INLINE void run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking);
};
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
Index size, Index otherSize,
const Scalar* _tri, Index triStride,
Scalar* _other, Index otherIncr, Index otherStride,
level3_blocking<Scalar,Scalar>& blocking)
{
Index rows = otherSize;
const_blas_data_mapper<Scalar, Index, TriStorageOrder> rhs(_tri,triStride);
blas_data_mapper<Scalar, Index, ColMajor> lhs(_other,otherStride);
typedef typename NumTraits<Scalar>::Real RealScalar;
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
LhsMapper lhs(_other, otherStride, otherIncr);
RhsMapper rhs(_tri, triStride);
typedef gebp_traits<Scalar,Scalar> Traits;
enum {
......@@ -202,17 +221,15 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*size;
std::size_t sizeW = kc*Traits::WorkSpaceFactor;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
ei_declare_aligned_stack_constructed_variable(Scalar, blockW, sizeW, blocking.blockW());
conj_if<Conjugate> conj;
gebp_kernel<Scalar,Scalar, Index, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder,false,true> pack_rhs_panel;
gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
gebp_kernel<Scalar, Scalar, Index, LhsMapper, Traits::mr, Traits::nr, false, Conjugate> gebp_kernel;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr, RhsStorageOrder,false,true> pack_rhs_panel;
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, ColMajor, false, true> pack_lhs_panel;
for(Index k2=IsLower ? size : 0;
IsLower ? k2>0 : k2<size;
......@@ -225,7 +242,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
Index rs = IsLower ? actual_k2 : size - actual_k2 - actual_kc;
Scalar* geb = blockB+actual_kc*actual_kc;
if (rs>0) pack_rhs(geb, &rhs(actual_k2,startPanel), triStride, actual_kc, rs);
if (rs>0) pack_rhs(geb, rhs.getSubMapper(actual_k2,startPanel), actual_kc, rs);
// triangular packing (we only pack the panels off the diagonal,
// neglecting the blocks overlapping the diagonal
......@@ -239,7 +256,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
if (panelLength>0)
pack_rhs_panel(blockB+j2*actual_kc,
&rhs(actual_k2+panelOffset, actual_j2), triStride,
rhs.getSubMapper(actual_k2+panelOffset, actual_j2),
panelLength, actualPanelWidth,
actual_kc, panelOffset);
}
......@@ -267,13 +284,12 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
// GEBP
if(panelLength>0)
{
gebp_kernel(&lhs(i2,absolute_j2), otherStride,
gebp_kernel(lhs.getSubMapper(i2,absolute_j2),
blockA, blockB+j2*actual_kc,
actual_mc, panelLength, actualPanelWidth,
Scalar(-1),
actual_kc, actual_kc, // strides
panelOffset, panelOffset, // offsets
blockW); // workspace
panelOffset, panelOffset); // offsets
}
// unblocked triangular solve
......@@ -281,34 +297,36 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
{
Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
Scalar* r = &lhs(i2,j);
typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);
for (Index k3=0; k3<k; ++k3)
{
Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
Scalar* a = &lhs(i2,IsLower ? j+1+k3 : absolute_j2+k3);
typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
for (Index i=0; i<actual_mc; ++i)
r[i] -= a[i] * b;
r(i) -= a(i) * b;
}
Scalar b = (Mode & UnitDiag) ? Scalar(1) : Scalar(1)/conj(rhs(j,j));
if((Mode & UnitDiag)==0)
{
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
for (Index i=0; i<actual_mc; ++i)
r[i] *= b;
r(i) *= inv_rjj;
}
}
// pack the just computed part of lhs to A
pack_lhs_panel(blockA, _other+absolute_j2*otherStride+i2, otherStride,
pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
actualPanelWidth, actual_mc,
actual_kc, j2);
}
}
if (rs>0)
gebp_kernel(_other+i2+startPanel*otherStride, otherStride, blockA, geb,
gebp_kernel(lhs.getSubMapper(i2, startPanel), blockA, geb,
actual_mc, actual_kc, rs, Scalar(-1),
-1, -1, 0, 0, blockW);
-1, -1, 0, 0);
}
}
}
};
} // end namespace internal
......
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
namespace Eigen {
namespace internal {
// implements LeftSide op(triangular)^-1 * general
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
eigen_assert(otherIncr == 1); \
BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */ \
EIGTYPE alpha(1); \
ldb = convert_index<BlasIndex>(otherStride);\
\
const EIGTYPE *a; \
/* Set trans */ \
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _tri; \
lda = convert_index<BlasIndex>(triStride); \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_L(double, double, dtrsm)
EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
EIGEN_BLAS_TRSM_L(float, float, strsm)
EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
#else
EIGEN_BLAS_TRSM_L(double, double, dtrsm_)
EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
EIGEN_BLAS_TRSM_L(float, float, strsm_)
EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
#endif
// implements RightSide general * op(triangular)^-1
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
eigen_assert(otherIncr == 1); \
BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */ \
EIGTYPE alpha(1); \
ldb = convert_index<BlasIndex>(otherStride);\
\
const EIGTYPE *a; \
/* Set trans */ \
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */ \
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */ \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _tri; \
lda = convert_index<BlasIndex>(triStride); \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_R(double, double, dtrsm)
EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
EIGEN_BLAS_TRSM_R(float, float, strsm)
EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8, ctrsm)
#else
EIGEN_BLAS_TRSM_R(double, double, dtrsm_)
EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
EIGEN_BLAS_TRSM_R(float, float, strsm_)
EIGEN_BLAS_TRSM_R(scomplex, float, ctrsm_)
#endif
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
......@@ -37,6 +37,10 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
{
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,RowMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
typedef const_blas_data_mapper<LhsScalar,Index,RowMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
typename internal::conditional<
Conjugate,
const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
......@@ -58,10 +62,10 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
Index startRow = IsLower ? pi : pi-actualPanelWidth;
Index startCol = IsLower ? 0 : pi;
general_matrix_vector_product<Index,LhsScalar,RowMajor,Conjugate,RhsScalar,false>::run(
general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,Conjugate,RhsScalar,RhsMapper,false>::run(
actualPanelWidth, r,
&lhs.coeffRef(startRow,startCol), lhsStride,
rhs + startCol, 1,
LhsMapper(&lhs.coeffRef(startRow,startCol), lhsStride),
RhsMapper(rhs + startCol, 1),
rhs + startRow, 1,
RhsScalar(-1));
}
......@@ -91,6 +95,8 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
{
typedef Map<const Matrix<LhsScalar,Dynamic,Dynamic,ColMajor>, 0, OuterStride<> > LhsMap;
const LhsMap lhs(_lhs,size,size,OuterStride<>(lhsStride));
typedef const_blas_data_mapper<LhsScalar,Index,ColMajor> LhsMapper;
typedef const_blas_data_mapper<RhsScalar,Index,ColMajor> RhsMapper;
typename internal::conditional<Conjugate,
const CwiseUnaryOp<typename internal::scalar_conjugate_op<LhsScalar>,LhsMap>,
const LhsMap&
......@@ -122,10 +128,10 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
general_matrix_vector_product<Index,LhsScalar,ColMajor,Conjugate,RhsScalar,false>::run(
general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,Conjugate,RhsScalar,RhsMapper,false>::run(
r, actualPanelWidth,
&lhs.coeffRef(endBlock,startBlock), lhsStride,
rhs+startBlock, 1,
LhsMapper(&lhs.coeffRef(endBlock,startBlock), lhsStride),
RhsMapper(rhs+startBlock, 1),
rhs+endBlock, 1, RhsScalar(-1));
}
}
......
......@@ -18,23 +18,25 @@ namespace Eigen {
namespace internal {
// forward declarations
template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
template<typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper, int mr, int nr, bool ConjugateLhs=false, bool ConjugateRhs=false>
struct gebp_kernel;
template<typename Scalar, typename Index, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
template<typename Scalar, typename Index, typename DataMapper, int nr, int StorageOrder, bool Conjugate = false, bool PanelMode=false>
struct gemm_pack_rhs;
template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
template<typename Scalar, typename Index, typename DataMapper, int Pack1, int Pack2, int StorageOrder, bool Conjugate = false, bool PanelMode = false>
struct gemm_pack_lhs;
template<
typename Index,
typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs,
int ResStorageOrder>
int ResStorageOrder, int ResInnerStride>
struct general_matrix_matrix_product;
template<typename Index, typename LhsScalar, int LhsStorageOrder, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version=Specialized>
template<typename Index,
typename LhsScalar, typename LhsMapper, int LhsStorageOrder, bool ConjugateLhs,
typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version=Specialized>
struct general_matrix_vector_product;
......@@ -42,22 +44,35 @@ template<bool Conjugate> struct conj_if;
template<> struct conj_if<true> {
template<typename T>
inline T operator()(const T& x) { return conj(x); }
inline T operator()(const T& x) const { return numext::conj(x); }
template<typename T>
inline T pconj(const T& x) { return internal::pconj(x); }
inline T pconj(const T& x) const { return internal::pconj(x); }
};
template<> struct conj_if<false> {
template<typename T>
inline const T& operator()(const T& x) { return x; }
inline const T& operator()(const T& x) const { return x; }
template<typename T>
inline const T& pconj(const T& x) { return x; }
inline const T& pconj(const T& x) const { return x; }
};
// Generic implementation for custom complex types.
template<typename LhsScalar, typename RhsScalar, bool ConjLhs, bool ConjRhs>
struct conj_helper
{
typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar>::ReturnType Scalar;
EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const
{ return padd(c, pmul(x,y)); }
EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const
{ return conj_if<ConjLhs>()(x) * conj_if<ConjRhs>()(y); }
};
template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
{
EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
......@@ -67,7 +82,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(real(x)*real(y) + imag(x)*imag(y), imag(x)*real(y) - real(x)*imag(y)); }
{ return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::imag(x)*numext::real(y) - numext::real(x)*numext::imag(y)); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,false>
......@@ -77,7 +92,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(real(x)*real(y) + imag(x)*imag(y), real(x)*imag(y) - imag(x)*real(y)); }
{ return Scalar(numext::real(x)*numext::real(y) + numext::imag(x)*numext::imag(y), numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
};
template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, true,true>
......@@ -87,7 +102,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
{ return c + pmul(x,y); }
EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const
{ return Scalar(real(x)*real(y) - imag(x)*imag(y), - real(x)*imag(y) - imag(x)*real(y)); }
{ return Scalar(numext::real(x)*numext::real(y) - numext::imag(x)*numext::imag(y), - numext::real(x)*numext::imag(y) - numext::imag(x)*numext::real(y)); }
};
template<typename RealScalar,bool Conj> struct conj_helper<std::complex<RealScalar>, RealScalar, Conj,false>
......@@ -109,39 +124,243 @@ template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::comp
};
template<typename From,typename To> struct get_factor {
static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); }
};
template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return real(x); }
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
};
template<typename Scalar, typename Index>
class BlasVectorMapper {
public:
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
return m_data[i];
}
template <typename Packet, int AlignmentType>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i);
}
template <typename Packet>
EIGEN_DEVICE_FUNC bool aligned(Index i) const {
return (UIntPtr(m_data+i)%sizeof(Packet))==0;
}
protected:
Scalar* m_data;
};
template<typename Scalar, typename Index, int AlignmentType, int Incr=1>
class BlasLinearMapper;
template<typename Scalar, typename Index, int AlignmentType>
class BlasLinearMapper<Scalar,Index,AlignmentType,1> {
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data, Index incr=1)
: m_data(data)
{
EIGEN_ONLY_USED_FOR_DEBUG(incr);
eigen_assert(incr==1);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
return m_data[i];
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
return ploadt<Packet, AlignmentType>(m_data + i);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const {
return ploadt<HalfPacket, AlignmentType>(m_data + i);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const {
pstoret<Scalar, Packet, AlignmentType>(m_data + i, p);
}
protected:
Scalar *m_data;
};
// Lightweight helper class to access matrix coefficients.
// Yes, this is somehow redundant with Map<>, but this version is much much lighter,
// and so I hope better compilation performance (time and code quality).
template<typename Scalar, typename Index, int StorageOrder>
class blas_data_mapper
template<typename Scalar, typename Index, int StorageOrder, int AlignmentType = Unaligned, int Incr = 1>
class blas_data_mapper;
template<typename Scalar, typename Index, int StorageOrder, int AlignmentType>
class blas_data_mapper<Scalar,Index,StorageOrder,AlignmentType,1>
{
public:
blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_STRONG_INLINE Scalar& operator()(Index i, Index j)
{ return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; }
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
typedef BlasLinearMapper<Scalar, Index, AlignmentType> LinearMapper;
typedef BlasVectorMapper<Scalar, Index> VectorMapper;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr=1)
: m_data(data), m_stride(stride)
{
EIGEN_ONLY_USED_FOR_DEBUG(incr);
eigen_assert(incr==1);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>
getSubMapper(Index i, Index j) const {
return blas_data_mapper<Scalar, Index, StorageOrder, AlignmentType>(&operator()(i, j), m_stride);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(&operator()(i, j));
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const {
return VectorMapper(&operator()(i, j));
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride];
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
return ploadt<Packet, AlignmentType>(&operator()(i, j));
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const {
return ploadt<HalfPacket, AlignmentType>(&operator()(i, j));
}
template<typename SubPacket>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}
template<typename SubPacket>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
}
EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; }
EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; }
EIGEN_DEVICE_FUNC Index firstAligned(Index size) const {
if (UIntPtr(m_data)%sizeof(Scalar)) {
return -1;
}
return internal::first_default_aligned(m_data, size);
}
protected:
Scalar* EIGEN_RESTRICT m_data;
Index m_stride;
const Index m_stride;
};
// Implementation of non-natural increment (i.e. inner-stride != 1)
// The exposed API is not complete yet compared to the Incr==1 case
// because some features makes less sense in this case.
template<typename Scalar, typename Index, int AlignmentType, int Incr>
class BlasLinearMapper
{
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data,Index incr) : m_data(data), m_incr(incr) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const {
internal::prefetch(&operator()(i));
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const {
return m_data[i*m_incr.value()];
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
return pgather<Scalar,Packet>(m_data + i*m_incr.value(), m_incr.value());
}
template<typename PacketType>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const PacketType &p) const {
pscatter<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value());
}
protected:
Scalar *m_data;
const internal::variable_if_dynamic<Index,Incr> m_incr;
};
template<typename Scalar, typename Index, int StorageOrder, int AlignmentType,int Incr>
class blas_data_mapper
{
public:
typedef typename packet_traits<Scalar>::type Packet;
typedef typename packet_traits<Scalar>::half HalfPacket;
typedef BlasLinearMapper<Scalar, Index, AlignmentType,Incr> LinearMapper;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride, Index incr) : m_data(data), m_stride(stride), m_incr(incr) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper
getSubMapper(Index i, Index j) const {
return blas_data_mapper(&operator()(i, j), m_stride, m_incr.value());
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
return LinearMapper(&operator()(i, j), m_incr.value());
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const {
return m_data[StorageOrder==RowMajor ? j*m_incr.value() + i*m_stride : i*m_incr.value() + j*m_stride];
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
return pgather<Scalar,Packet>(&operator()(i, j),m_incr.value());
}
template <typename PacketT, int AlignmentT>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const {
return pgather<Scalar,PacketT>(&operator()(i, j),m_incr.value());
}
template<typename SubPacket>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const {
pscatter<Scalar, SubPacket>(&operator()(i, j), p, m_stride);
}
template<typename SubPacket>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const {
return pgather<Scalar, SubPacket>(&operator()(i, j), m_stride);
}
protected:
Scalar* EIGEN_RESTRICT m_data;
const Index m_stride;
const internal::variable_if_dynamic<Index,Incr> m_incr;
};
// lightweight helper class to access matrix coefficients (const version)
template<typename Scalar, typename Index, int StorageOrder>
class const_blas_data_mapper
{
class const_blas_data_mapper : public blas_data_mapper<const Scalar, Index, StorageOrder> {
public:
const_blas_data_mapper(const Scalar* data, Index stride) : m_data(data), m_stride(stride) {}
EIGEN_STRONG_INLINE const Scalar& operator()(Index i, Index j) const
{ return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; }
protected:
const Scalar* EIGEN_RESTRICT m_data;
Index m_stride;
EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper<const Scalar, Index, StorageOrder>(data, stride) {}
EIGEN_ALWAYS_INLINE const_blas_data_mapper<Scalar, Index, StorageOrder> getSubMapper(Index i, Index j) const {
return const_blas_data_mapper<Scalar, Index, StorageOrder>(&(this->operator()(i, j)), this->m_stride);
}
};
......@@ -188,17 +407,33 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
};
// pop scalar multiple
template<typename Scalar, typename NestedXpr>
struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
template<typename Scalar, typename NestedXpr, typename Plain>
struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
: blas_traits<NestedXpr>
{
typedef blas_traits<NestedXpr> Base;
typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
};
template<typename Scalar, typename NestedXpr, typename Plain>
struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
: blas_traits<NestedXpr>
{
typedef blas_traits<NestedXpr> Base;
typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
typedef typename Base::ExtractType ExtractType;
static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
static inline Scalar extractScalarFactor(const XprType& x)
{ return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); }
{ return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
};
template<typename Scalar, typename Plain1, typename Plain2>
struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1>,
const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain2> > >
: blas_traits<CwiseNullaryOp<scalar_constant_op<Scalar>,Plain1> >
{};
// pop opposite
template<typename Scalar, typename NestedXpr>
......@@ -230,7 +465,7 @@ struct blas_traits<Transpose<NestedXpr> >
enum {
IsTransposed = Base::IsTransposed ? 0 : 1
};
static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); }
static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
};
......
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2007-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
......@@ -13,18 +13,31 @@
namespace Eigen {
/** This value means that a quantity is not known at compile-time, and that instead the value is
/** This value means that a positive quantity (e.g., a size) is not known at compile-time, and that instead the value is
* stored in some runtime variable.
*
* Changing the value of Dynamic breaks the ABI, as Dynamic is often used as a template parameter for Matrix.
*/
const int Dynamic = -1;
/** This value means that a signed quantity (e.g., a signed index) is not known at compile-time, and that instead its value
* has to be specified at runtime.
*/
const int DynamicIndex = 0xffffff;
/** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>().
* The value Infinity there means the L-infinity norm.
*/
const int Infinity = -1;
/** This value means that the cost to evaluate an expression coefficient is either very expensive or
* cannot be known at compile time.
*
* This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions.
* It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow.
*/
const int HugeCost = 10000;
/** \defgroup flags Flags
* \ingroup Core_Module
*
......@@ -44,18 +57,18 @@ const int Infinity = -1;
* If this bit is not set, the storage order is column-major.
* For an expression, this determines the storage order of
* the matrix created by evaluation of that expression.
* \sa \ref TopicStorageOrders */
* \sa \blank \ref TopicStorageOrders */
const unsigned int RowMajorBit = 0x1;
/** \ingroup flags
*
* means the expression should be evaluated by the calling expression */
const unsigned int EvalBeforeNestingBit = 0x2;
/** \ingroup flags
*
* \deprecated
* means the expression should be evaluated before any assignment */
const unsigned int EvalBeforeAssigningBit = 0x4;
EIGEN_DEPRECATED
const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated
/** \ingroup flags
*
......@@ -136,17 +149,46 @@ const unsigned int LvalueBit = 0x20;
*/
const unsigned int DirectAccessBit = 0x40;
/** \ingroup flags
/** \deprecated \ingroup flags
*
* means the first coefficient packet is guaranteed to be aligned */
const unsigned int AlignedBit = 0x80;
* means the first coefficient packet is guaranteed to be aligned.
* An expression cannot has the AlignedBit without the PacketAccessBit flag.
* In other words, this means we are allow to perform an aligned packet access to the first element regardless
* of the expression kind:
* \code
* expression.packet<Aligned>(0);
* \endcode
*/
EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80;
const unsigned int NestByRefBit = 0x100;
/** \ingroup flags
*
* for an expression, this means that the storage order
* can be either row-major or column-major.
* The precise choice will be decided at evaluation time or when
* combined with other expressions.
* \sa \blank \ref RowMajorBit, \ref TopicStorageOrders */
const unsigned int NoPreferredStorageOrderBit = 0x200;
/** \ingroup flags
*
* Means that the underlying coefficients can be accessed through pointers to the sparse (un)compressed storage format,
* that is, the expression provides:
* \code
inline const Scalar* valuePtr() const;
inline const Index* innerIndexPtr() const;
inline const Index* outerIndexPtr() const;
inline const Index* innerNonZeroPtr() const;
\endcode
*/
const unsigned int CompressedAccessBit = 0x400;
// list of flags that are inherited by default
const unsigned int HereditaryBits = RowMajorBit
| EvalBeforeNestingBit
| EvalBeforeAssigningBit;
| EvalBeforeNestingBit;
/** \defgroup enums Enumerations
* \ingroup Core_Module
......@@ -155,9 +197,9 @@ const unsigned int HereditaryBits = RowMajorBit
*/
/** \ingroup enums
* Enum containing possible values for the \p Mode parameter of
* MatrixBase::selfadjointView() and MatrixBase::triangularView(). */
enum {
* Enum containing possible values for the \c Mode or \c UpLo parameter of
* MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */
enum UpLoType {
/** View matrix as a lower triangular matrix. */
Lower=0x1,
/** View matrix as an upper triangular matrix. */
......@@ -181,12 +223,31 @@ enum {
};
/** \ingroup enums
* Enum for indicating whether an object is aligned or not. */
enum {
/** Object is not correctly aligned for vectorization. */
Unaligned=0,
/** Object is aligned for vectorization. */
Aligned=1
* Enum for indicating whether a buffer is aligned or not. */
enum AlignmentType {
Unaligned=0, /**< Data pointer has no specific alignment. */
Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */
Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */
Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */
Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */
Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */
AlignedMask=255,
Aligned=16, /**< \deprecated Synonym for Aligned16. */
#if EIGEN_MAX_ALIGN_BYTES==128
AlignedMax = Aligned128
#elif EIGEN_MAX_ALIGN_BYTES==64
AlignedMax = Aligned64
#elif EIGEN_MAX_ALIGN_BYTES==32
AlignedMax = Aligned32
#elif EIGEN_MAX_ALIGN_BYTES==16
AlignedMax = Aligned16
#elif EIGEN_MAX_ALIGN_BYTES==8
AlignedMax = Aligned8
#elif EIGEN_MAX_ALIGN_BYTES==0
AlignedMax = Unaligned
#else
#error Invalid value for EIGEN_MAX_ALIGN_BYTES
#endif
};
/** \ingroup enums
......@@ -212,7 +273,7 @@ enum DirectionType {
/** \internal \ingroup enums
* Enum to specify how to traverse the entries of a matrix. */
enum {
enum TraversalType {
/** \internal Default traversal, no vectorization, no index-based access */
DefaultTraversal,
/** \internal No vectorization, use index-based access to have only one for loop instead of 2 nested loops */
......@@ -227,12 +288,14 @@ enum {
* scalar loops to handle the unaligned boundaries */
SliceVectorizedTraversal,
/** \internal Special case to properly handle incompatible scalar types or other defecting cases*/
InvalidTraversal
InvalidTraversal,
/** \internal Evaluate all entries at once */
AllAtOnceTraversal
};
/** \internal \ingroup enums
* Enum to specify whether to unroll loops when traversing over the entries of a matrix. */
enum {
enum UnrollingType {
/** \internal Do not unroll loops. */
NoUnrolling,
/** \internal Unroll only the inner loop, but not the outer loop. */
......@@ -244,7 +307,7 @@ enum {
/** \internal \ingroup enums
* Enum to specify whether to use the default (built-in) implementation or the specialization. */
enum {
enum SpecializedType {
Specialized,
BuiltIn
};
......@@ -252,20 +315,20 @@ enum {
/** \ingroup enums
* Enum containing possible values for the \p _Options template parameter of
* Matrix, Array and BandMatrix. */
enum {
enum StorageOptions {
/** Storage order is column major (see \ref TopicStorageOrders). */
ColMajor = 0,
/** Storage order is row major (see \ref TopicStorageOrders). */
RowMajor = 0x1, // it is only a coincidence that this is equal to RowMajorBit -- don't rely on that
/** \internal Align the matrix itself if it is vectorizable fixed-size */
/** Align the matrix itself if it is vectorizable fixed-size */
AutoAlign = 0,
/** \internal Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
/** Don't require alignment for the matrix itself (the array of coefficients, if dynamically allocated, may still be requested to be aligned) */ // FIXME --- clarify the situation
DontAlign = 0x2
};
/** \ingroup enums
* Enum for specifying whether to apply or solve on the left or right. */
enum {
enum SideType {
/** Apply transformation on the left. */
OnTheLeft = 1,
/** Apply transformation on the right. */
......@@ -290,7 +353,7 @@ enum Default_t { Default };
/** \internal \ingroup enums
* Used in AmbiVector. */
enum {
enum AmbiVectorMode {
IsDense = 0,
IsSparse
};
......@@ -399,10 +462,16 @@ namespace Architecture
Generic = 0x0,
SSE = 0x1,
AltiVec = 0x2,
VSX = 0x3,
NEON = 0x4,
#if defined EIGEN_VECTORIZE_SSE
Target = SSE
#elif defined EIGEN_VECTORIZE_ALTIVEC
Target = AltiVec
#elif defined EIGEN_VECTORIZE_VSX
Target = VSX
#elif defined EIGEN_VECTORIZE_NEON
Target = NEON
#else
Target = Generic
#endif
......@@ -410,8 +479,9 @@ namespace Architecture
}
/** \internal \ingroup enums
* Enum used as template parameter in GeneralProduct. */
enum { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
* Enum used as template parameter in Product and product evaluators. */
enum ProductImplType
{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct };
/** \internal \ingroup enums
* Enum used in experimental parallel implementation. */
......@@ -420,12 +490,58 @@ enum Action {GetAction, SetAction};
/** The type used to identify a dense storage. */
struct Dense {};
/** The type used to identify a general sparse storage. */
struct Sparse {};
/** The type used to identify a general solver (factored) storage. */
struct SolverStorage {};
/** The type used to identify a permutation storage. */
struct PermutationStorage {};
/** The type used to identify a permutation storage. */
struct TranspositionsStorage {};
/** The type used to identify a matrix expression */
struct MatrixXpr {};
/** The type used to identify an array expression */
struct ArrayXpr {};
// An evaluator must define its shape. By default, it can be one of the following:
struct DenseShape { static std::string debugName() { return "DenseShape"; } };
struct SolverShape { static std::string debugName() { return "SolverShape"; } };
struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } };
struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } };
struct BandShape { static std::string debugName() { return "BandShape"; } };
struct TriangularShape { static std::string debugName() { return "TriangularShape"; } };
struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } };
struct PermutationShape { static std::string debugName() { return "PermutationShape"; } };
struct TranspositionsShape { static std::string debugName() { return "TranspositionsShape"; } };
struct SparseShape { static std::string debugName() { return "SparseShape"; } };
namespace internal {
// random access iterators based on coeff*() accessors.
struct IndexBased {};
// evaluator based on iterators to access coefficients.
struct IteratorBased {};
/** \internal
* Constants for comparison functors
*/
enum ComparisonName {
cmp_EQ = 0,
cmp_LT = 1,
cmp_LE = 2,
cmp_UNORD = 3,
cmp_NEQ = 4,
cmp_GT = 5,
cmp_GE = 6
};
} // end namespace internal
} // end namespace Eigen
#endif // EIGEN_CONSTANTS_H
......@@ -10,24 +10,31 @@
// 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data
// 4273 - QtAlignedMalloc, inconsistent DLL linkage
// 4324 - structure was padded due to declspec(align())
// 4503 - decorated name length exceeded, name was truncated
// 4512 - assignment operator could not be generated
// 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used
// 4714 - function marked as __forceinline not inlined
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push )
#endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4512 4522 4700 4717 )
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800)
#elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
// typedef that may be a reference type.
// 279 - controlling expression is constant
// ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case.
// 1684 - conversion from pointer to same-sized integral type (potential portability problem)
// 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning push
#endif
#pragma warning disable 2196 279
#pragma warning disable 2196 279 1684 2259
#elif defined __clang__
// -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant
// this is really a stupid warning as it warns on compile-time expressions involving enums
......@@ -35,6 +42,53 @@
#pragma clang diagnostic push
#endif
#pragma clang diagnostic ignored "-Wconstant-logical-operand"
#elif defined __GNUC__
#if (!defined(EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS)) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6))
#pragma GCC diagnostic push
#endif
// g++ warns about local variables shadowing member functions, which is too strict
#pragma GCC diagnostic ignored "-Wshadow"
#if __GNUC__ == 4 && __GNUC_MINOR__ < 8
// Until g++-4.7 there are warnings when comparing unsigned int vs 0, even in templated functions:
#pragma GCC diagnostic ignored "-Wtype-limits"
#endif
#if __GNUC__>=6
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
#if __GNUC__==7
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89325
#pragma GCC diagnostic ignored "-Wattributes"
#endif
#endif
#if defined __NVCC__
// Disable the "statement is unreachable" message
#pragma diag_suppress code_is_unreachable
// Disable the "dynamic initialization in unreachable code" message
#pragma diag_suppress initialization_not_reachable
// Disable the "invalid error number" message that we get with older versions of nvcc
#pragma diag_suppress 1222
// Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler)
#pragma diag_suppress 2527
#pragma diag_suppress 2529
#pragma diag_suppress 2651
#pragma diag_suppress 2653
#pragma diag_suppress 2668
#pragma diag_suppress 2669
#pragma diag_suppress 2670
#pragma diag_suppress 2671
#pragma diag_suppress 2735
#pragma diag_suppress 2737
#endif
#else
// warnings already disabled:
# ifndef EIGEN_WARNINGS_DISABLED_2
# define EIGEN_WARNINGS_DISABLED_2
# elif defined(EIGEN_INTERNAL_DEBUGGING)
# error "Do not include \"DisableStupidWarnings.h\" recursively more than twice!"
# endif
#endif // not EIGEN_WARNINGS_DISABLED
......@@ -36,6 +36,10 @@ template<typename Derived> struct accessors_level
};
};
template<typename T> struct evaluator_traits;
template< typename T> struct evaluator;
} // end namespace internal
template<typename T> struct NumTraits;
......@@ -43,26 +47,22 @@ template<typename T> struct NumTraits;
template<typename Derived> struct EigenBase;
template<typename Derived> class DenseBase;
template<typename Derived> class PlainObjectBase;
template<typename Derived,
int Level = internal::accessors_level<Derived>::value >
class DenseCoeffsBase;
template<typename Derived, int Level> class DenseCoeffsBase;
template<typename _Scalar, int _Rows, int _Cols,
int _Options = AutoAlign |
#if defined(__GNUC__) && __GNUC__==3 && __GNUC_MINOR__==4
#if EIGEN_GNUC_AT(3,4)
// workaround a bug in at least gcc 3.4.6
// the innermost ?: ternary operator is misparsed. We write it slightly
// differently and this makes gcc 3.4.6 happy, but it's ugly.
// The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
// (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
( (_Rows==1 && _Cols!=1) ? RowMajor
( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
: !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
: ColMajor ),
: Eigen::ColMajor ),
#else
( (_Rows==1 && _Cols!=1) ? RowMajor
: (_Cols==1 && _Rows!=1) ? ColMajor
( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
: (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
: EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
#endif
int _MaxRows = _Rows,
......@@ -78,8 +78,7 @@ template<typename ExpressionType> class NestByValue;
template<typename ExpressionType> class ForceAlignedAccess;
template<typename ExpressionType> class SwapWrapper;
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false,
bool HasDirectAccess = internal::has_direct_access<XprType>::ret> class Block;
template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block;
template<typename MatrixType, int Size=Dynamic> class VectorBlock;
template<typename MatrixType> class Transpose;
......@@ -88,10 +87,11 @@ template<typename NullaryOp, typename MatrixType> class CwiseNullaryOp;
template<typename UnaryOp, typename MatrixType> class CwiseUnaryOp;
template<typename ViewOp, typename MatrixType> class CwiseUnaryView;
template<typename BinaryOp, typename Lhs, typename Rhs> class CwiseBinaryOp;
template<typename BinOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp;
template<typename Derived, typename Lhs, typename Rhs> class ProductBase;
template<typename Lhs, typename Rhs, int Mode> class GeneralProduct;
template<typename Lhs, typename Rhs, int NestingFlags> class CoeffBasedProduct;
template<typename TernaryOp, typename Arg1, typename Arg2, typename Arg3> class CwiseTernaryOp;
template<typename Decomposition, typename Rhstype> class Solve;
template<typename XprType> class Inverse;
template<typename Lhs, typename Rhs, int Option = DefaultProduct> class Product;
template<typename Derived> class DiagonalBase;
template<typename _DiagonalVectorType> class DiagonalWrapper;
......@@ -109,7 +109,12 @@ template<typename Derived,
int Level = internal::accessors_level<Derived>::has_write_access ? WriteAccessors : ReadOnlyAccessors
> class MapBase;
template<int InnerStrideAtCompileTime, int OuterStrideAtCompileTime> class Stride;
template<int Value = Dynamic> class InnerStride;
template<int Value = Dynamic> class OuterStride;
template<typename MatrixType, int MapOptions=Unaligned, typename StrideType = Stride<0,0> > class Map;
template<typename Derived> class RefBase;
template<typename PlainObjectType, int Options = 0,
typename StrideType = typename internal::conditional<PlainObjectType::IsVectorAtCompileTime,InnerStride<1>,OuterStride<> >::type > class Ref;
template<typename Derived> class TriangularBase;
template<typename MatrixType, unsigned int Mode> class TriangularView;
......@@ -120,10 +125,10 @@ template<typename MatrixType> struct CommaInitializer;
template<typename Derived> class ReturnByValue;
template<typename ExpressionType> class ArrayWrapper;
template<typename ExpressionType> class MatrixWrapper;
template<typename Derived> class SolverBase;
template<typename XprType> class InnerIterator;
namespace internal {
template<typename DecompositionType, typename Rhs> struct solve_retval_base;
template<typename DecompositionType, typename Rhs> struct solve_retval;
template<typename DecompositionType> struct kernel_retval_base;
template<typename DecompositionType> struct kernel_retval;
template<typename DecompositionType> struct image_retval_base;
......@@ -136,6 +141,21 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami
namespace internal {
template<typename Lhs, typename Rhs> struct product_type;
template<bool> struct EnableIf;
/** \internal
* \class product_evaluator
* Products need their own evaluator with more template arguments allowing for
* easier partial template specializations.
*/
template< typename T,
int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
typename LhsScalar = typename traits<typename T::Lhs>::Scalar,
typename RhsScalar = typename traits<typename T::Rhs>::Scalar
> struct product_evaluator;
}
template<typename Lhs, typename Rhs,
......@@ -151,10 +171,11 @@ namespace internal {
// with optional conjugation of the arguments.
template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;
template<typename Scalar> struct scalar_sum_op;
template<typename Scalar> struct scalar_difference_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op;
template<typename Scalar> struct scalar_quotient_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op;
template<typename Scalar> struct scalar_opposite_op;
template<typename Scalar> struct scalar_conjugate_op;
template<typename Scalar> struct scalar_real_op;
......@@ -162,6 +183,7 @@ template<typename Scalar> struct scalar_imag_op;
template<typename Scalar> struct scalar_abs_op;
template<typename Scalar> struct scalar_abs2_op;
template<typename Scalar> struct scalar_sqrt_op;
template<typename Scalar> struct scalar_rsqrt_op;
template<typename Scalar> struct scalar_exp_op;
template<typename Scalar> struct scalar_log_op;
template<typename Scalar> struct scalar_cos_op;
......@@ -169,22 +191,28 @@ template<typename Scalar> struct scalar_sin_op;
template<typename Scalar> struct scalar_acos_op;
template<typename Scalar> struct scalar_asin_op;
template<typename Scalar> struct scalar_tan_op;
template<typename Scalar> struct scalar_pow_op;
template<typename Scalar> struct scalar_inverse_op;
template<typename Scalar> struct scalar_square_op;
template<typename Scalar> struct scalar_cube_op;
template<typename Scalar, typename NewType> struct scalar_cast_op;
template<typename Scalar> struct scalar_multiple_op;
template<typename Scalar> struct scalar_quotient1_op;
template<typename Scalar> struct scalar_min_op;
template<typename Scalar> struct scalar_max_op;
template<typename Scalar> struct scalar_random_op;
template<typename Scalar> struct scalar_add_op;
template<typename Scalar> struct scalar_constant_op;
template<typename Scalar> struct scalar_identity_op;
template<typename Scalar,bool iscpx> struct scalar_sign_op;
template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
// SpecialFunctions module
template<typename Scalar> struct scalar_lgamma_op;
template<typename Scalar> struct scalar_digamma_op;
template<typename Scalar> struct scalar_erf_op;
template<typename Scalar> struct scalar_erfc_op;
template<typename Scalar> struct scalar_igamma_op;
template<typename Scalar> struct scalar_igammac_op;
template<typename Scalar> struct scalar_zeta_op;
template<typename Scalar> struct scalar_betainc_op;
} // end namespace internal
......@@ -193,18 +221,18 @@ struct IOFormat;
// Array module
template<typename _Scalar, int _Rows, int _Cols,
int _Options = AutoAlign |
#if defined(__GNUC__) && __GNUC__==3 && __GNUC_MINOR__==4
#if EIGEN_GNUC_AT(3,4)
// workaround a bug in at least gcc 3.4.6
// the innermost ?: ternary operator is misparsed. We write it slightly
// differently and this makes gcc 3.4.6 happy, but it's ugly.
// The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
// (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
( (_Rows==1 && _Cols!=1) ? RowMajor
( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
: !(_Cols==1 && _Rows!=1) ? EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
: ColMajor ),
: Eigen::ColMajor ),
#else
( (_Rows==1 && _Cols!=1) ? RowMajor
: (_Cols==1 && _Rows!=1) ? ColMajor
( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
: (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
: EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
#endif
int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
......@@ -222,7 +250,9 @@ template<typename MatrixType> struct inverse_impl;
template<typename MatrixType> class HouseholderQR;
template<typename MatrixType> class ColPivHouseholderQR;
template<typename MatrixType> class FullPivHouseholderQR;
template<typename MatrixType> class CompleteOrthogonalDecomposition;
template<typename MatrixType, int QRPreconditioner = ColPivHouseholderQRPreconditioner> class JacobiSVD;
template<typename MatrixType> class BDCSVD;
template<typename MatrixType, int UpLo = Lower> class LLT;
template<typename MatrixType, int UpLo = Lower> class LDLT;
template<typename VectorsType, typename CoeffsType, int Side=OnTheLeft> class HouseholderSequence;
......@@ -235,42 +265,24 @@ template<typename Derived> class QuaternionBase;
template<typename Scalar> class Rotation2D;
template<typename Scalar> class AngleAxis;
template<typename Scalar,int Dim> class Translation;
#ifdef EIGEN2_SUPPORT
template<typename Derived, int _Dim> class eigen2_RotationBase;
template<typename Lhs, typename Rhs> class eigen2_Cross;
template<typename Scalar> class eigen2_Quaternion;
template<typename Scalar> class eigen2_Rotation2D;
template<typename Scalar> class eigen2_AngleAxis;
template<typename Scalar,int Dim> class eigen2_Transform;
template <typename _Scalar, int _AmbientDim> class eigen2_ParametrizedLine;
template <typename _Scalar, int _AmbientDim> class eigen2_Hyperplane;
template<typename Scalar,int Dim> class eigen2_Translation;
template<typename Scalar,int Dim> class eigen2_Scaling;
#endif
#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
template<typename Scalar> class Quaternion;
template<typename Scalar,int Dim> class Transform;
template <typename _Scalar, int _AmbientDim> class ParametrizedLine;
template <typename _Scalar, int _AmbientDim> class Hyperplane;
template<typename Scalar,int Dim> class Scaling;
#endif
#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
template<typename Scalar,int Dim> class AlignedBox;
template<typename Scalar, int Options = AutoAlign> class Quaternion;
template<typename Scalar,int Dim,int Mode,int _Options=AutoAlign> class Transform;
template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class ParametrizedLine;
template <typename _Scalar, int _AmbientDim, int Options=AutoAlign> class Hyperplane;
template<typename Scalar> class UniformScaling;
template<typename MatrixType,int Direction> class Homogeneous;
#endif
// Sparse module:
template<typename Derived> class SparseMatrixBase;
// MatrixFunctions module
template<typename Derived> struct MatrixExponentialReturnValue;
template<typename Derived> class MatrixFunctionReturnValue;
template<typename Derived> class MatrixSquareRootReturnValue;
template<typename Derived> class MatrixLogarithmReturnValue;
template<typename Derived> class MatrixPowerReturnValue;
template<typename Derived> class MatrixComplexPowerReturnValue;
namespace internal {
template <typename Scalar>
......@@ -281,18 +293,6 @@ struct stem_function
};
}
#ifdef EIGEN2_SUPPORT
template<typename ExpressionType> class Cwise;
template<typename MatrixType> class Minor;
template<typename MatrixType> class LU;
template<typename MatrixType> class QR;
template<typename MatrixType> class SVD;
namespace internal {
template<typename MatrixType, unsigned int Mode> struct eigen2_part_return_type;
}
#endif
} // end namespace Eigen
#endif // EIGEN_FORWARDDECLARATIONS_H
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment