Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
pydensecrf
Commits
13b115ab
Commit
13b115ab
authored
Mar 08, 2021
by
Simon Tulling
Browse files
Fix Windows MSVC install by updating Eigen Library
parent
4d5343c3
Changes
154
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
2148 additions
and
551 deletions
+2148
-551
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
...include/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+258
-147
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
...de/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
+295
-0
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointMatrixVector.h
...include/Eigen/src/Core/products/SelfadjointMatrixVector.h
+67
-81
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
...de/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
+118
-0
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointProduct.h
...ecrf/include/Eigen/src/Core/products/SelfadjointProduct.h
+27
-19
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointRank2Update.h
.../include/Eigen/src/Core/products/SelfadjointRank2Update.h
+13
-13
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularMatrixMatrix.h
.../include/Eigen/src/Core/products/TriangularMatrixMatrix.h
+149
-80
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
...ude/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
+317
-0
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularMatrixVector.h
.../include/Eigen/src/Core/products/TriangularMatrixVector.h
+114
-102
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
...ude/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
+255
-0
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularSolverMatrix.h
.../include/Eigen/src/Core/products/TriangularSolverMatrix.h
+78
-60
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
...ude/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
+167
-0
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularSolverVector.h
.../include/Eigen/src/Core/products/TriangularSolverVector.h
+15
-9
pydensecrf/densecrf/include/Eigen/src/Core/util/BlasUtil.h
pydensecrf/densecrf/include/Eigen/src/Core/util/BlasUtil.h
+275
-40
No files found.
Too many changes to show.
To preserve performance only
154 of 154+
files are displayed.
Plain diff
Email patch
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
View file @
13b115ab
...
...
@@ -15,7 +15,7 @@ namespace Eigen {
namespace
internal
{
// pack a selfadjoint block diagonal for use with the gebp_kernel
template
<
typename
Scalar
,
typename
Index
,
int
Pack1
,
int
Pack2
,
int
StorageOrder
>
template
<
typename
Scalar
,
typename
Index
,
int
Pack1
,
int
Pack2
_dummy
,
int
StorageOrder
>
struct
symm_pack_lhs
{
template
<
int
BlockRows
>
inline
...
...
@@ -30,9 +30,9 @@ struct symm_pack_lhs
for
(
Index
k
=
i
;
k
<
i
+
BlockRows
;
k
++
)
{
for
(
Index
w
=
0
;
w
<
h
;
w
++
)
blockA
[
count
++
]
=
conj
(
lhs
(
k
,
i
+
w
));
// transposed
blockA
[
count
++
]
=
numext
::
conj
(
lhs
(
k
,
i
+
w
));
// transposed
blockA
[
count
++
]
=
real
(
lhs
(
k
,
k
));
// real (diagonal)
blockA
[
count
++
]
=
numext
::
real
(
lhs
(
k
,
k
));
// real (diagonal)
for
(
Index
w
=
h
+
1
;
w
<
BlockRows
;
w
++
)
blockA
[
count
++
]
=
lhs
(
i
+
w
,
k
);
// normal
...
...
@@ -41,34 +41,41 @@ struct symm_pack_lhs
// transposed copy
for
(
Index
k
=
i
+
BlockRows
;
k
<
cols
;
k
++
)
for
(
Index
w
=
0
;
w
<
BlockRows
;
w
++
)
blockA
[
count
++
]
=
conj
(
lhs
(
k
,
i
+
w
));
// transposed
blockA
[
count
++
]
=
numext
::
conj
(
lhs
(
k
,
i
+
w
));
// transposed
}
void
operator
()(
Scalar
*
blockA
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
Index
cols
,
Index
rows
)
{
enum
{
PacketSize
=
packet_traits
<
Scalar
>::
size
};
const_blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
>
lhs
(
_lhs
,
lhsStride
);
Index
count
=
0
;
Index
peeled_mc
=
(
rows
/
Pack1
)
*
Pack1
;
for
(
Index
i
=
0
;
i
<
peeled_mc
;
i
+=
Pack1
)
{
pack
<
Pack1
>
(
blockA
,
lhs
,
cols
,
i
,
count
);
}
if
(
rows
-
peeled_mc
>=
Pack2
)
{
pack
<
Pack2
>
(
blockA
,
lhs
,
cols
,
peeled_mc
,
count
);
peeled_mc
+=
Pack2
;
}
//Index peeled_mc3 = (rows/Pack1)*Pack1;
const
Index
peeled_mc3
=
Pack1
>=
3
*
PacketSize
?
(
rows
/
(
3
*
PacketSize
))
*
(
3
*
PacketSize
)
:
0
;
const
Index
peeled_mc2
=
Pack1
>=
2
*
PacketSize
?
peeled_mc3
+
((
rows
-
peeled_mc3
)
/
(
2
*
PacketSize
))
*
(
2
*
PacketSize
)
:
0
;
const
Index
peeled_mc1
=
Pack1
>=
1
*
PacketSize
?
(
rows
/
(
1
*
PacketSize
))
*
(
1
*
PacketSize
)
:
0
;
if
(
Pack1
>=
3
*
PacketSize
)
for
(
Index
i
=
0
;
i
<
peeled_mc3
;
i
+=
3
*
PacketSize
)
pack
<
3
*
PacketSize
>
(
blockA
,
lhs
,
cols
,
i
,
count
);
if
(
Pack1
>=
2
*
PacketSize
)
for
(
Index
i
=
peeled_mc3
;
i
<
peeled_mc2
;
i
+=
2
*
PacketSize
)
pack
<
2
*
PacketSize
>
(
blockA
,
lhs
,
cols
,
i
,
count
);
if
(
Pack1
>=
1
*
PacketSize
)
for
(
Index
i
=
peeled_mc2
;
i
<
peeled_mc1
;
i
+=
1
*
PacketSize
)
pack
<
1
*
PacketSize
>
(
blockA
,
lhs
,
cols
,
i
,
count
);
// do the same with mr==1
for
(
Index
i
=
peeled_mc
;
i
<
rows
;
i
++
)
for
(
Index
i
=
peeled_mc
1
;
i
<
rows
;
i
++
)
{
for
(
Index
k
=
0
;
k
<
i
;
k
++
)
blockA
[
count
++
]
=
lhs
(
i
,
k
);
// normal
blockA
[
count
++
]
=
lhs
(
i
,
k
);
// normal
blockA
[
count
++
]
=
real
(
lhs
(
i
,
i
));
// real (diagonal)
blockA
[
count
++
]
=
numext
::
real
(
lhs
(
i
,
i
));
// real (diagonal)
for
(
Index
k
=
i
+
1
;
k
<
cols
;
k
++
)
blockA
[
count
++
]
=
conj
(
lhs
(
k
,
i
));
// transposed
blockA
[
count
++
]
=
numext
::
conj
(
lhs
(
k
,
i
));
// transposed
}
}
};
...
...
@@ -82,7 +89,8 @@ struct symm_pack_rhs
Index
end_k
=
k2
+
rows
;
Index
count
=
0
;
const_blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
>
rhs
(
_rhs
,
rhsStride
);
Index
packet_cols
=
(
cols
/
nr
)
*
nr
;
Index
packet_cols8
=
nr
>=
8
?
(
cols
/
8
)
*
8
:
0
;
Index
packet_cols4
=
nr
>=
4
?
(
cols
/
4
)
*
4
:
0
;
// first part: normal case
for
(
Index
j2
=
0
;
j2
<
k2
;
j2
+=
nr
)
...
...
@@ -91,91 +99,163 @@ struct symm_pack_rhs
{
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
if
(
nr
=
=
4
)
if
(
nr
>
=
4
)
{
blockB
[
count
+
2
]
=
rhs
(
k
,
j2
+
2
);
blockB
[
count
+
3
]
=
rhs
(
k
,
j2
+
3
);
}
if
(
nr
>=
8
)
{
blockB
[
count
+
4
]
=
rhs
(
k
,
j2
+
4
);
blockB
[
count
+
5
]
=
rhs
(
k
,
j2
+
5
);
blockB
[
count
+
6
]
=
rhs
(
k
,
j2
+
6
);
blockB
[
count
+
7
]
=
rhs
(
k
,
j2
+
7
);
}
count
+=
nr
;
}
}
// second part: diagonal block
for
(
Index
j2
=
k2
;
j2
<
(
std
::
min
)(
k2
+
rows
,
packet_cols
);
j2
+=
nr
)
Index
end8
=
nr
>=
8
?
(
std
::
min
)(
k2
+
rows
,
packet_cols8
)
:
k2
;
if
(
nr
>=
8
)
{
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for
(
Index
k
=
k2
;
k
<
j2
;
k
++
)
for
(
Index
j2
=
k2
;
j2
<
end8
;
j2
+=
8
)
{
blockB
[
count
+
0
]
=
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
conj
(
rhs
(
j2
+
1
,
k
));
if
(
nr
==
4
)
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for
(
Index
k
=
k2
;
k
<
j2
;
k
++
)
{
blockB
[
count
+
2
]
=
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
conj
(
rhs
(
j2
+
3
,
k
));
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
blockB
[
count
+
4
]
=
numext
::
conj
(
rhs
(
j2
+
4
,
k
));
blockB
[
count
+
5
]
=
numext
::
conj
(
rhs
(
j2
+
5
,
k
));
blockB
[
count
+
6
]
=
numext
::
conj
(
rhs
(
j2
+
6
,
k
));
blockB
[
count
+
7
]
=
numext
::
conj
(
rhs
(
j2
+
7
,
k
));
count
+=
8
;
}
count
+=
nr
;
}
// symmetric
Index
h
=
0
;
for
(
Index
k
=
j2
;
k
<
j2
+
nr
;
k
++
)
{
// normal
for
(
Index
w
=
0
;
w
<
h
;
++
w
)
blockB
[
count
+
w
]
=
rhs
(
k
,
j2
+
w
);
// symmetric
Index
h
=
0
;
for
(
Index
k
=
j2
;
k
<
j2
+
8
;
k
++
)
{
// normal
for
(
Index
w
=
0
;
w
<
h
;
++
w
)
blockB
[
count
+
w
]
=
rhs
(
k
,
j2
+
w
);
blockB
[
count
+
h
]
=
real
(
rhs
(
k
,
k
));
blockB
[
count
+
h
]
=
numext
::
real
(
rhs
(
k
,
k
));
// transpose
for
(
Index
w
=
h
+
1
;
w
<
nr
;
++
w
)
blockB
[
count
+
w
]
=
conj
(
rhs
(
j2
+
w
,
k
));
count
+=
nr
;
++
h
;
// transpose
for
(
Index
w
=
h
+
1
;
w
<
8
;
++
w
)
blockB
[
count
+
w
]
=
numext
::
conj
(
rhs
(
j2
+
w
,
k
));
count
+=
8
;
++
h
;
}
// normal
for
(
Index
k
=
j2
+
8
;
k
<
end_k
;
k
++
)
{
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
blockB
[
count
+
2
]
=
rhs
(
k
,
j2
+
2
);
blockB
[
count
+
3
]
=
rhs
(
k
,
j2
+
3
);
blockB
[
count
+
4
]
=
rhs
(
k
,
j2
+
4
);
blockB
[
count
+
5
]
=
rhs
(
k
,
j2
+
5
);
blockB
[
count
+
6
]
=
rhs
(
k
,
j2
+
6
);
blockB
[
count
+
7
]
=
rhs
(
k
,
j2
+
7
);
count
+=
8
;
}
}
// normal
for
(
Index
k
=
j2
+
nr
;
k
<
end_k
;
k
++
)
}
if
(
nr
>=
4
)
{
for
(
Index
j2
=
end8
;
j2
<
(
std
::
min
)(
k2
+
rows
,
packet_cols4
);
j2
+=
4
)
{
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
if
(
nr
==
4
)
// again we can split vertically in three different parts (transpose, symmetric, normal)
// transpose
for
(
Index
k
=
k2
;
k
<
j2
;
k
++
)
{
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
count
+=
4
;
}
// symmetric
Index
h
=
0
;
for
(
Index
k
=
j2
;
k
<
j2
+
4
;
k
++
)
{
// normal
for
(
Index
w
=
0
;
w
<
h
;
++
w
)
blockB
[
count
+
w
]
=
rhs
(
k
,
j2
+
w
);
blockB
[
count
+
h
]
=
numext
::
real
(
rhs
(
k
,
k
));
// transpose
for
(
Index
w
=
h
+
1
;
w
<
4
;
++
w
)
blockB
[
count
+
w
]
=
numext
::
conj
(
rhs
(
j2
+
w
,
k
));
count
+=
4
;
++
h
;
}
// normal
for
(
Index
k
=
j2
+
4
;
k
<
end_k
;
k
++
)
{
blockB
[
count
+
0
]
=
rhs
(
k
,
j2
+
0
);
blockB
[
count
+
1
]
=
rhs
(
k
,
j2
+
1
);
blockB
[
count
+
2
]
=
rhs
(
k
,
j2
+
2
);
blockB
[
count
+
3
]
=
rhs
(
k
,
j2
+
3
);
count
+=
4
;
}
count
+=
nr
;
}
}
// third part: transposed
for
(
Index
j2
=
k2
+
rows
;
j2
<
packet_cols
;
j2
+=
nr
)
if
(
nr
>=
8
)
{
for
(
Index
k
=
k2
;
k
<
end_k
;
k
++
)
for
(
Index
j2
=
k2
+
rows
;
j2
<
packet_cols8
;
j2
+=
8
)
{
blockB
[
count
+
0
]
=
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
conj
(
rhs
(
j2
+
1
,
k
));
if
(
nr
==
4
)
for
(
Index
k
=
k2
;
k
<
end_k
;
k
++
)
{
blockB
[
count
+
2
]
=
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
conj
(
rhs
(
j2
+
3
,
k
));
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
blockB
[
count
+
4
]
=
numext
::
conj
(
rhs
(
j2
+
4
,
k
));
blockB
[
count
+
5
]
=
numext
::
conj
(
rhs
(
j2
+
5
,
k
));
blockB
[
count
+
6
]
=
numext
::
conj
(
rhs
(
j2
+
6
,
k
));
blockB
[
count
+
7
]
=
numext
::
conj
(
rhs
(
j2
+
7
,
k
));
count
+=
8
;
}
}
}
if
(
nr
>=
4
)
{
for
(
Index
j2
=
(
std
::
max
)(
packet_cols8
,
k2
+
rows
);
j2
<
packet_cols4
;
j2
+=
4
)
{
for
(
Index
k
=
k2
;
k
<
end_k
;
k
++
)
{
blockB
[
count
+
0
]
=
numext
::
conj
(
rhs
(
j2
+
0
,
k
));
blockB
[
count
+
1
]
=
numext
::
conj
(
rhs
(
j2
+
1
,
k
));
blockB
[
count
+
2
]
=
numext
::
conj
(
rhs
(
j2
+
2
,
k
));
blockB
[
count
+
3
]
=
numext
::
conj
(
rhs
(
j2
+
3
,
k
));
count
+=
4
;
}
count
+=
nr
;
}
}
// copy the remaining columns one at a time (=> the same with nr==1)
for
(
Index
j2
=
packet_cols
;
j2
<
cols
;
++
j2
)
for
(
Index
j2
=
packet_cols
4
;
j2
<
cols
;
++
j2
)
{
// transpose
Index
half
=
(
std
::
min
)(
end_k
,
j2
);
for
(
Index
k
=
k2
;
k
<
half
;
k
++
)
{
blockB
[
count
]
=
conj
(
rhs
(
j2
,
k
));
blockB
[
count
]
=
numext
::
conj
(
rhs
(
j2
,
k
));
count
+=
1
;
}
if
(
half
==
j2
&&
half
<
k2
+
rows
)
{
blockB
[
count
]
=
real
(
rhs
(
j2
,
j2
));
blockB
[
count
]
=
numext
::
real
(
rhs
(
j2
,
j2
));
count
+=
1
;
}
else
...
...
@@ -197,69 +277,85 @@ struct symm_pack_rhs
template
<
typename
Scalar
,
typename
Index
,
int
LhsStorageOrder
,
bool
LhsSelfAdjoint
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
RhsSelfAdjoint
,
bool
ConjugateRhs
,
int
ResStorageOrder
>
int
ResStorageOrder
,
int
ResInnerStride
>
struct
product_selfadjoint_matrix
;
template
<
typename
Scalar
,
typename
Index
,
int
LhsStorageOrder
,
bool
LhsSelfAdjoint
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
RhsSelfAdjoint
,
bool
ConjugateRhs
>
struct
product_selfadjoint_matrix
<
Scalar
,
Index
,
LhsStorageOrder
,
LhsSelfAdjoint
,
ConjugateLhs
,
RhsStorageOrder
,
RhsSelfAdjoint
,
ConjugateRhs
,
RowMajor
>
int
RhsStorageOrder
,
bool
RhsSelfAdjoint
,
bool
ConjugateRhs
,
int
ResInnerStride
>
struct
product_selfadjoint_matrix
<
Scalar
,
Index
,
LhsStorageOrder
,
LhsSelfAdjoint
,
ConjugateLhs
,
RhsStorageOrder
,
RhsSelfAdjoint
,
ConjugateRhs
,
RowMajor
,
ResInnerStride
>
{
static
EIGEN_STRONG_INLINE
void
run
(
Index
rows
,
Index
cols
,
const
Scalar
*
lhs
,
Index
lhsStride
,
const
Scalar
*
rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
alpha
)
Scalar
*
res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
product_selfadjoint_matrix
<
Scalar
,
Index
,
EIGEN_LOGICAL_XOR
(
RhsSelfAdjoint
,
RhsStorageOrder
==
RowMajor
)
?
ColMajor
:
RowMajor
,
RhsSelfAdjoint
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
RhsSelfAdjoint
,
ConjugateRhs
),
EIGEN_LOGICAL_XOR
(
LhsSelfAdjoint
,
LhsStorageOrder
==
RowMajor
)
?
ColMajor
:
RowMajor
,
LhsSelfAdjoint
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
LhsSelfAdjoint
,
ConjugateLhs
),
ColMajor
>
::
run
(
cols
,
rows
,
rhs
,
rhsStride
,
lhs
,
lhsStride
,
res
,
resStride
,
alpha
);
ColMajor
,
ResInnerStride
>
::
run
(
cols
,
rows
,
rhs
,
rhsStride
,
lhs
,
lhsStride
,
res
,
resIncr
,
resStride
,
alpha
,
blocking
);
}
};
template
<
typename
Scalar
,
typename
Index
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
>
struct
product_selfadjoint_matrix
<
Scalar
,
Index
,
LhsStorageOrder
,
true
,
ConjugateLhs
,
RhsStorageOrder
,
false
,
ConjugateRhs
,
ColMajor
>
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
>
struct
product_selfadjoint_matrix
<
Scalar
,
Index
,
LhsStorageOrder
,
true
,
ConjugateLhs
,
RhsStorageOrder
,
false
,
ConjugateRhs
,
ColMajor
,
ResInnerStride
>
{
static
EIGEN_DONT_INLINE
void
run
(
Index
rows
,
Index
cols
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
alpha
)
Scalar
*
res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
);
};
template
<
typename
Scalar
,
typename
Index
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
>
EIGEN_DONT_INLINE
void
product_selfadjoint_matrix
<
Scalar
,
Index
,
LhsStorageOrder
,
true
,
ConjugateLhs
,
RhsStorageOrder
,
false
,
ConjugateRhs
,
ColMajor
,
ResInnerStride
>::
run
(
Index
rows
,
Index
cols
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
_res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
Index
size
=
rows
;
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
lhs
(
_lhs
,
lhsStride
);
const_blas_data_mapper
<
Scalar
,
Index
,
RhsStorageOrder
>
rhs
(
_rhs
,
rhsStride
);
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
Index
kc
=
size
;
// cache block size along the K direction
Index
mc
=
rows
;
// cache block size along the M direction
Index
nc
=
cols
;
// cache block size along the N direction
computeProductBlockingSizes
<
Scalar
,
Scalar
>
(
kc
,
mc
,
nc
);
// kc must smaller than mc
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
LhsMapper
;
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
(
LhsStorageOrder
==
RowMajor
)
?
ColMajor
:
RowMajor
>
LhsTransposeMapper
;
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
RhsStorageOrder
>
RhsMapper
;
typedef
blas_data_mapper
<
typename
Traits
::
ResScalar
,
Index
,
ColMajor
,
Unaligned
,
ResInnerStride
>
ResMapper
;
LhsMapper
lhs
(
_lhs
,
lhsStride
);
LhsTransposeMapper
lhs_transpose
(
_lhs
,
lhsStride
);
RhsMapper
rhs
(
_rhs
,
rhsStride
);
ResMapper
res
(
_res
,
resStride
,
resIncr
);
Index
kc
=
blocking
.
kc
();
// cache block size along the K direction
Index
mc
=
(
std
::
min
)(
rows
,
blocking
.
mc
());
// cache block size along the M direction
// kc must be smaller than mc
kc
=
(
std
::
min
)(
kc
,
mc
);
std
::
size_t
sizeA
=
kc
*
mc
;
std
::
size_t
sizeB
=
kc
*
cols
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
sizeA
,
blocking
.
blockA
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockB
,
sizeB
,
blocking
.
blockB
());
std
::
size_t
sizeW
=
kc
*
Traits
::
WorkSpaceFactor
;
std
::
size_t
sizeB
=
sizeW
+
kc
*
cols
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
kc
*
mc
,
0
);
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
allocatedBlockB
,
sizeB
,
0
);
Scalar
*
blockB
=
allocatedBlockB
+
sizeW
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
ResMapper
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
symm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
==
RowMajor
?
ColMajor
:
RowMajor
,
true
>
pack_lhs_transposed
;
gemm_pack_rhs
<
Scalar
,
Index
,
RhsMapper
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gemm_pack_lhs
<
Scalar
,
Index
,
LhsTransposeMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
==
RowMajor
?
ColMajor
:
RowMajor
,
true
>
pack_lhs_transposed
;
for
(
Index
k2
=
0
;
k2
<
size
;
k2
+=
kc
)
{
...
...
@@ -268,7 +364,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
// we have selected one row panel of rhs and one column panel of lhs
// pack rhs's panel into a sequential chunk of memory
// and expand each coeff to a constant packet for further reuse
pack_rhs
(
blockB
,
&
rhs
(
k2
,
0
),
rhsStride
,
actual_kc
,
cols
);
pack_rhs
(
blockB
,
rhs
.
getSubMapper
(
k2
,
0
)
,
actual_kc
,
cols
);
// the select lhs's panel has to be split in three different parts:
// 1 - the transposed panel above the diagonal block => transposed packed copy
...
...
@@ -278,9 +374,9 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
{
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
k2
)
-
i2
;
// transposed packed copy
pack_lhs_transposed
(
blockA
,
&
lhs
(
k
2
,
i
2
),
lhsStride
,
actual_kc
,
actual_mc
);
pack_lhs_transposed
(
blockA
,
lhs
_transpose
.
getSubMapper
(
i
2
,
k
2
),
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
i2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
gebp_kernel
(
res
.
getSubMapper
(
i2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
}
// the block diagonal
{
...
...
@@ -288,53 +384,65 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
// symmetric packed copy
pack_lhs
(
blockA
,
&
lhs
(
k2
,
k2
),
lhsStride
,
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
k2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
gebp_kernel
(
res
.
getSubMapper
(
k2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
}
for
(
Index
i2
=
k2
+
kc
;
i2
<
size
;
i2
+=
mc
)
{
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
size
)
-
i2
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
,
false
>
()
(
blockA
,
&
lhs
(
i2
,
k2
),
lhsStride
,
actual_kc
,
actual_mc
);
gemm_pack_lhs
<
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
,
false
>
()
(
blockA
,
lhs
.
getSubMapper
(
i2
,
k2
)
,
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
i2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
gebp_kernel
(
res
.
getSubMapper
(
i2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
}
}
}
};
// matrix * selfadjoint product
template
<
typename
Scalar
,
typename
Index
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
>
struct
product_selfadjoint_matrix
<
Scalar
,
Index
,
LhsStorageOrder
,
false
,
ConjugateLhs
,
RhsStorageOrder
,
true
,
ConjugateRhs
,
ColMajor
>
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
>
struct
product_selfadjoint_matrix
<
Scalar
,
Index
,
LhsStorageOrder
,
false
,
ConjugateLhs
,
RhsStorageOrder
,
true
,
ConjugateRhs
,
ColMajor
,
ResInnerStride
>
{
static
EIGEN_DONT_INLINE
void
run
(
Index
rows
,
Index
cols
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
alpha
)
Scalar
*
res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
);
};
template
<
typename
Scalar
,
typename
Index
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
>
EIGEN_DONT_INLINE
void
product_selfadjoint_matrix
<
Scalar
,
Index
,
LhsStorageOrder
,
false
,
ConjugateLhs
,
RhsStorageOrder
,
true
,
ConjugateRhs
,
ColMajor
,
ResInnerStride
>::
run
(
Index
rows
,
Index
cols
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
_res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
Index
size
=
cols
;
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
lhs
(
_lhs
,
lhsStride
);
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
Index
kc
=
size
;
// cache block size along the K direction
Index
mc
=
rows
;
// cache block size along the M direction
Index
nc
=
cols
;
// cache block size along the N direction
computeProductBlockingSizes
<
Scalar
,
Scalar
>
(
kc
,
mc
,
nc
);
std
::
size_t
sizeW
=
kc
*
Traits
::
WorkSpaceFactor
;
std
::
size_t
sizeB
=
sizeW
+
kc
*
cols
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
kc
*
mc
,
0
);
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
allocatedBlockB
,
sizeB
,
0
);
Scalar
*
blockB
=
allocatedBlockB
+
sizeW
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
LhsMapper
;
typedef
blas_data_mapper
<
typename
Traits
::
ResScalar
,
Index
,
ColMajor
,
Unaligned
,
ResInnerStride
>
ResMapper
;
LhsMapper
lhs
(
_lhs
,
lhsStride
);
ResMapper
res
(
_res
,
resStride
,
resIncr
);
Index
kc
=
blocking
.
kc
();
// cache block size along the K direction
Index
mc
=
(
std
::
min
)(
rows
,
blocking
.
mc
());
// cache block size along the M direction
std
::
size_t
sizeA
=
kc
*
mc
;
std
::
size_t
sizeB
=
kc
*
cols
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
sizeA
,
blocking
.
blockA
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockB
,
sizeB
,
blocking
.
blockB
());
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
ResMapper
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
symm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
for
(
Index
k2
=
0
;
k2
<
size
;
k2
+=
kc
)
...
...
@@ -347,13 +455,12 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
for
(
Index
i2
=
0
;
i2
<
rows
;
i2
+=
mc
)
{
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
rows
)
-
i2
;
pack_lhs
(
blockA
,
&
lhs
(
i2
,
k2
),
lhsStride
,
actual_kc
,
actual_mc
);
pack_lhs
(
blockA
,
lhs
.
getSubMapper
(
i2
,
k2
)
,
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
i2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
gebp_kernel
(
res
.
getSubMapper
(
i2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
);
}
}
}
};
}
// end namespace internal
...
...
@@ -362,55 +469,59 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
***************************************************************************/
namespace
internal
{
template
<
typename
Lhs
,
int
LhsMode
,
typename
Rhs
,
int
RhsMode
>
struct
traits
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
>
:
traits
<
ProductBase
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
,
Lhs
,
Rhs
>
>
{};
}
template
<
typename
Lhs
,
int
LhsMode
,
typename
Rhs
,
int
RhsMode
>
struct
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
:
public
ProductBase
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
,
Lhs
,
Rhs
>
struct
selfadjoint_product_impl
<
Lhs
,
LhsMode
,
false
,
Rhs
,
RhsMode
,
false
>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE
(
SelfadjointProductMatrix
)
SelfadjointProductMatrix
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
)
:
Base
(
lhs
,
rhs
)
{}
typedef
typename
Product
<
Lhs
,
Rhs
>::
Scalar
Scalar
;
typedef
internal
::
blas_traits
<
Lhs
>
LhsBlasTraits
;
typedef
typename
LhsBlasTraits
::
DirectLinearAccessType
ActualLhsType
;
typedef
internal
::
blas_traits
<
Rhs
>
RhsBlasTraits
;
typedef
typename
RhsBlasTraits
::
DirectLinearAccessType
ActualRhsType
;
enum
{
LhsIsUpper
=
(
LhsMode
&
(
Upper
|
Lower
))
==
Upper
,
LhsIsSelfAdjoint
=
(
LhsMode
&
SelfAdjoint
)
==
SelfAdjoint
,
RhsIsUpper
=
(
RhsMode
&
(
Upper
|
Lower
))
==
Upper
,
RhsIsSelfAdjoint
=
(
RhsMode
&
SelfAdjoint
)
==
SelfAdjoint
};
template
<
typename
Dest
>
void
scaleAndAddTo
(
Dest
&
dst
,
Scalar
alpha
)
const
template
<
typename
Dest
>
static
void
run
(
Dest
&
dst
,
const
Lhs
&
a_lhs
,
const
Rhs
&
a_rhs
,
const
Scalar
&
alpha
)
{
eigen_assert
(
dst
.
rows
()
==
m
_lhs
.
rows
()
&&
dst
.
cols
()
==
m
_rhs
.
cols
());
eigen_assert
(
dst
.
rows
()
==
a
_lhs
.
rows
()
&&
dst
.
cols
()
==
a
_rhs
.
cols
());
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
lhs
=
LhsBlasTraits
::
extract
(
m
_lhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
rhs
=
RhsBlasTraits
::
extract
(
m
_rhs
);
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
lhs
=
LhsBlasTraits
::
extract
(
a
_lhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
rhs
=
RhsBlasTraits
::
extract
(
a
_rhs
);
Scalar
actualAlpha
=
alpha
*
LhsBlasTraits
::
extractScalarFactor
(
m_lhs
)
*
RhsBlasTraits
::
extractScalarFactor
(
m_rhs
);
Scalar
actualAlpha
=
alpha
*
LhsBlasTraits
::
extractScalarFactor
(
a_lhs
)
*
RhsBlasTraits
::
extractScalarFactor
(
a_rhs
);
typedef
internal
::
gemm_blocking_space
<
(
Dest
::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
Scalar
,
Scalar
,
Lhs
::
MaxRowsAtCompileTime
,
Rhs
::
MaxColsAtCompileTime
,
Lhs
::
MaxColsAtCompileTime
,
1
>
BlockingType
;
BlockingType
blocking
(
lhs
.
rows
(),
rhs
.
cols
(),
lhs
.
cols
(),
1
,
false
);
internal
::
product_selfadjoint_matrix
<
Scalar
,
Index
,
EIGEN_LOGICAL_XOR
(
LhsIsUpper
,
internal
::
traits
<
Lhs
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
LhsIsSelfAdjoint
,
EIGEN_LOGICAL_XOR
(
LhsIsUpper
,
internal
::
traits
<
Lhs
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
LhsIsSelfAdjoint
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
LhsIsUpper
,
bool
(
LhsBlasTraits
::
NeedToConjugate
)),
EIGEN_LOGICAL_XOR
(
RhsIsUpper
,
internal
::
traits
<
Rhs
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
RhsIsSelfAdjoint
,
EIGEN_LOGICAL_XOR
(
RhsIsUpper
,
internal
::
traits
<
Rhs
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
RhsIsSelfAdjoint
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
RhsIsUpper
,
bool
(
RhsBlasTraits
::
NeedToConjugate
)),
internal
::
traits
<
Dest
>::
Flags
&
RowMajorBit
?
RowMajor
:
ColMajor
>
internal
::
traits
<
Dest
>::
Flags
&
RowMajorBit
?
RowMajor
:
ColMajor
,
Dest
::
InnerStrideAtCompileTime
>
::
run
(
lhs
.
rows
(),
rhs
.
cols
(),
// sizes
&
lhs
.
coeffRef
(
0
,
0
),
lhs
.
outerStride
(),
// lhs info
&
rhs
.
coeffRef
(
0
,
0
),
rhs
.
outerStride
(),
// rhs info
&
dst
.
coeffRef
(
0
,
0
),
dst
.
outerStride
(),
// result info
actualAlpha
// alpha
&
lhs
.
coeffRef
(
0
,
0
),
lhs
.
outerStride
(),
// lhs info
&
rhs
.
coeffRef
(
0
,
0
),
rhs
.
outerStride
(),
// rhs info
&
dst
.
coeffRef
(
0
,
0
),
dst
.
innerStride
(),
dst
.
outerStride
(),
// result info
actualAlpha
,
blocking
// alpha
);
}
};
}
// end namespace internal
}
// end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
0 → 100644
View file @
13b115ab
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
namespace
Eigen
{
namespace
internal
{
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
{\
\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>&
/*blocking*/
) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='L', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
\
/* Set transpose options */
\
/* Set m, n, k */
\
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */
\
lda = convert_index<BlasIndex>(lhsStride); \
ldb = convert_index<BlasIndex>(rhsStride); \
ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */
\
if (LhsStorageOrder==RowMajor) uplo='U'; \
a = _lhs; \
\
if (RhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
{\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>&
/*blocking*/
) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='L', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> a_tmp; \
\
/* Set transpose options */
\
/* Set m, n, k */
\
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */
\
lda = convert_index<BlasIndex>(lhsStride); \
ldb = convert_index<BlasIndex>(rhsStride); \
ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */
\
if (((LhsStorageOrder==ColMajor) && ConjugateLhs) || ((LhsStorageOrder==RowMajor) && (!ConjugateLhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder>, 0, OuterStride<> > lhs(_lhs,m,m,OuterStride<>(lhsStride)); \
a_tmp = lhs.conjugate(); \
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _lhs; \
if (LhsStorageOrder==RowMajor) uplo='U'; \
\
if (RhsStorageOrder==ColMajor && (!ConjugateRhs)) { \
b = _rhs; } \
else { \
if (RhsStorageOrder==ColMajor && ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,m,n,OuterStride<>(rhsStride)); \
b_tmp = rhs.conjugate(); \
} else \
if (ConjugateRhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > rhs(_rhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = rhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_L
(
double
,
double
,
d
,
dsymm
)
EIGEN_BLAS_SYMM_L
(
float
,
float
,
f
,
ssymm
)
EIGEN_BLAS_HEMM_L
(
dcomplex
,
MKL_Complex16
,
cd
,
zhemm
)
EIGEN_BLAS_HEMM_L
(
scomplex
,
MKL_Complex8
,
cf
,
chemm
)
#else
EIGEN_BLAS_SYMM_L
(
double
,
double
,
d
,
dsymm_
)
EIGEN_BLAS_SYMM_L
(
float
,
float
,
f
,
ssymm_
)
EIGEN_BLAS_HEMM_L
(
dcomplex
,
double
,
cd
,
zhemm_
)
EIGEN_BLAS_HEMM_L
(
scomplex
,
float
,
cf
,
chemm_
)
#endif
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
{\
\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>&
/*blocking*/
) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='R', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
\
/* Set m, n, k */
\
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */
\
lda = convert_index<BlasIndex>(rhsStride); \
ldb = convert_index<BlasIndex>(lhsStride); \
ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */
\
if (RhsStorageOrder==RowMajor) uplo='U'; \
a = _rhs; \
\
if (LhsStorageOrder==RowMajor) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(rhsStride)); \
b_tmp = lhs.adjoint(); \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \
\
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
{\
static void run( \
Index rows, Index cols, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resIncr, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>&
/*blocking*/
) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
char side='R', uplo='L'; \
BlasIndex m, n, lda, ldb, ldc; \
const EIGTYPE *a, *b; \
EIGTYPE beta(1); \
MatrixX##EIGPREFIX b_tmp; \
Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> a_tmp; \
\
/* Set m, n, k */
\
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(cols); \
\
/* Set lda, ldb, ldc */
\
lda = convert_index<BlasIndex>(rhsStride); \
ldb = convert_index<BlasIndex>(lhsStride); \
ldc = convert_index<BlasIndex>(resStride); \
\
/* Set a, b, c */
\
if (((RhsStorageOrder==ColMajor) && ConjugateRhs) || ((RhsStorageOrder==RowMajor) && (!ConjugateRhs))) { \
Map<const Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder>, 0, OuterStride<> > rhs(_rhs,n,n,OuterStride<>(rhsStride)); \
a_tmp = rhs.conjugate(); \
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else a = _rhs; \
if (RhsStorageOrder==RowMajor) uplo='U'; \
\
if (LhsStorageOrder==ColMajor && (!ConjugateLhs)) { \
b = _lhs; } \
else { \
if (LhsStorageOrder==ColMajor && ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,m,n,OuterStride<>(lhsStride)); \
b_tmp = lhs.conjugate(); \
} else \
if (ConjugateLhs) { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.adjoint(); \
} else { \
Map<const MatrixX##EIGPREFIX, 0, OuterStride<> > lhs(_lhs,n,m,OuterStride<>(lhsStride)); \
b_tmp = lhs.transpose(); \
} \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMM_R
(
double
,
double
,
d
,
dsymm
)
EIGEN_BLAS_SYMM_R
(
float
,
float
,
f
,
ssymm
)
EIGEN_BLAS_HEMM_R
(
dcomplex
,
MKL_Complex16
,
cd
,
zhemm
)
EIGEN_BLAS_HEMM_R
(
scomplex
,
MKL_Complex8
,
cf
,
chemm
)
#else
EIGEN_BLAS_SYMM_R
(
double
,
double
,
d
,
dsymm_
)
EIGEN_BLAS_SYMM_R
(
float
,
float
,
f
,
ssymm_
)
EIGEN_BLAS_HEMM_R
(
dcomplex
,
double
,
cd
,
zhemm_
)
EIGEN_BLAS_HEMM_R
(
scomplex
,
float
,
cf
,
chemm_
)
#endif
}
// end namespace internal
}
// end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_BLAS_H
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointMatrixVector.h
View file @
13b115ab
...
...
@@ -30,7 +30,16 @@ struct selfadjoint_matrix_vector_product
static
EIGEN_DONT_INLINE
void
run
(
Index
size
,
const
Scalar
*
lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsIncr
,
const
Scalar
*
rhs
,
Scalar
*
res
,
Scalar
alpha
);
};
template
<
typename
Scalar
,
typename
Index
,
int
StorageOrder
,
int
UpLo
,
bool
ConjugateLhs
,
bool
ConjugateRhs
,
int
Version
>
EIGEN_DONT_INLINE
void
selfadjoint_matrix_vector_product
<
Scalar
,
Index
,
StorageOrder
,
UpLo
,
ConjugateLhs
,
ConjugateRhs
,
Version
>::
run
(
Index
size
,
const
Scalar
*
lhs
,
Index
lhsStride
,
const
Scalar
*
rhs
,
Scalar
*
res
,
Scalar
alpha
)
{
...
...
@@ -46,23 +55,13 @@ static EIGEN_DONT_INLINE void run(
conj_helper
<
Scalar
,
Scalar
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
ConjugateLhs
,
IsRowMajor
),
ConjugateRhs
>
cj0
;
conj_helper
<
Scalar
,
Scalar
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
ConjugateLhs
,
!
IsRowMajor
),
ConjugateRhs
>
cj1
;
conj_helper
<
Scalar
,
Scalar
,
NumTraits
<
Scalar
>::
IsComplex
,
ConjugateRhs
>
cjd
;
conj_helper
<
Real
Scalar
,
Scalar
,
false
,
ConjugateRhs
>
cjd
;
conj_helper
<
Packet
,
Packet
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
ConjugateLhs
,
IsRowMajor
),
ConjugateRhs
>
pcj0
;
conj_helper
<
Packet
,
Packet
,
NumTraits
<
Scalar
>::
IsComplex
&&
EIGEN_LOGICAL_XOR
(
ConjugateLhs
,
!
IsRowMajor
),
ConjugateRhs
>
pcj1
;
Scalar
cjAlpha
=
ConjugateRhs
?
conj
(
alpha
)
:
alpha
;
Scalar
cjAlpha
=
ConjugateRhs
?
numext
::
conj
(
alpha
)
:
alpha
;
// FIXME this copy is now handled outside product_selfadjoint_vector, so it could probably be removed.
// if the rhs is not sequentially stored in memory we copy it to a temporary buffer,
// this is because we need to extract packets
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
rhs
,
size
,
rhsIncr
==
1
?
const_cast
<
Scalar
*>
(
_rhs
)
:
0
);
if
(
rhsIncr
!=
1
)
{
const
Scalar
*
it
=
_rhs
;
for
(
Index
i
=
0
;
i
<
size
;
++
i
,
it
+=
rhsIncr
)
rhs
[
i
]
=
*
it
;
}
Index
bound
=
(
std
::
max
)(
Index
(
0
),
size
-
8
)
&
0xfffffffe
;
if
(
FirstTriangular
)
...
...
@@ -71,8 +70,8 @@ static EIGEN_DONT_INLINE void run(
for
(
Index
j
=
FirstTriangular
?
bound
:
0
;
j
<
(
FirstTriangular
?
size
:
bound
);
j
+=
2
)
{
register
const
Scalar
*
EIGEN_RESTRICT
A0
=
lhs
+
j
*
lhsStride
;
register
const
Scalar
*
EIGEN_RESTRICT
A1
=
lhs
+
(
j
+
1
)
*
lhsStride
;
const
Scalar
*
EIGEN_RESTRICT
A0
=
lhs
+
j
*
lhsStride
;
const
Scalar
*
EIGEN_RESTRICT
A1
=
lhs
+
(
j
+
1
)
*
lhsStride
;
Scalar
t0
=
cjAlpha
*
rhs
[
j
];
Packet
ptmp0
=
pset1
<
Packet
>
(
t0
);
...
...
@@ -84,14 +83,13 @@ static EIGEN_DONT_INLINE void run(
Scalar
t3
(
0
);
Packet
ptmp3
=
pset1
<
Packet
>
(
t3
);
size_t
starti
=
FirstTriangular
?
0
:
j
+
2
;
size_t
endi
=
FirstTriangular
?
j
:
size
;
size_t
alignedStart
=
(
starti
)
+
internal
::
first_aligned
(
&
res
[
starti
],
endi
-
starti
);
size_t
alignedEnd
=
alignedStart
+
((
endi
-
alignedStart
)
/
(
PacketSize
))
*
(
PacketSize
);
Index
starti
=
FirstTriangular
?
0
:
j
+
2
;
Index
endi
=
FirstTriangular
?
j
:
size
;
Index
alignedStart
=
(
starti
)
+
internal
::
first_
default_
aligned
(
&
res
[
starti
],
endi
-
starti
);
Index
alignedEnd
=
alignedStart
+
((
endi
-
alignedStart
)
/
(
PacketSize
))
*
(
PacketSize
);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res
[
j
]
+=
cjd
.
pmul
(
internal
::
real
(
A0
[
j
]),
t0
);
res
[
j
+
1
]
+=
cjd
.
pmul
(
internal
::
real
(
A1
[
j
+
1
]),
t1
);
res
[
j
]
+=
cjd
.
pmul
(
numext
::
real
(
A0
[
j
]),
t0
);
res
[
j
+
1
]
+=
cjd
.
pmul
(
numext
::
real
(
A1
[
j
+
1
]),
t1
);
if
(
FirstTriangular
)
{
res
[
j
]
+=
cj0
.
pmul
(
A1
[
j
],
t1
);
...
...
@@ -103,11 +101,11 @@ static EIGEN_DONT_INLINE void run(
t2
+=
cj1
.
pmul
(
A0
[
j
+
1
],
rhs
[
j
+
1
]);
}
for
(
size_t
i
=
starti
;
i
<
alignedStart
;
++
i
)
for
(
Index
i
=
starti
;
i
<
alignedStart
;
++
i
)
{
res
[
i
]
+=
t0
*
A0
[
i
]
+
t1
*
A1
[
i
];
t2
+=
c
onj
(
A0
[
i
]
)
*
rhs
[
i
];
t3
+=
c
onj
(
A1
[
i
]
)
*
rhs
[
i
];
res
[
i
]
+=
cj0
.
pmul
(
A0
[
i
],
t0
)
+
cj0
.
pmul
(
A1
[
i
]
,
t1
)
;
t2
+=
c
j1
.
pmul
(
A0
[
i
]
,
rhs
[
i
]
)
;
t3
+=
c
j1
.
pmul
(
A1
[
i
]
,
rhs
[
i
]
)
;
}
// Yes this an optimization for gcc 4.3 and 4.4 (=> huge speed up)
// gcc 4.2 does this optimization automatically.
...
...
@@ -115,7 +113,7 @@ static EIGEN_DONT_INLINE void run(
const
Scalar
*
EIGEN_RESTRICT
a1It
=
A1
+
alignedStart
;
const
Scalar
*
EIGEN_RESTRICT
rhsIt
=
rhs
+
alignedStart
;
Scalar
*
EIGEN_RESTRICT
resIt
=
res
+
alignedStart
;
for
(
size_t
i
=
alignedStart
;
i
<
alignedEnd
;
i
+=
PacketSize
)
for
(
Index
i
=
alignedStart
;
i
<
alignedEnd
;
i
+=
PacketSize
)
{
Packet
A0i
=
ploadu
<
Packet
>
(
a0It
);
a0It
+=
PacketSize
;
Packet
A1i
=
ploadu
<
Packet
>
(
a1It
);
a1It
+=
PacketSize
;
...
...
@@ -127,7 +125,7 @@ static EIGEN_DONT_INLINE void run(
ptmp3
=
pcj1
.
pmadd
(
A1i
,
Bi
,
ptmp3
);
pstore
(
resIt
,
Xi
);
resIt
+=
PacketSize
;
}
for
(
size_t
i
=
alignedEnd
;
i
<
endi
;
i
++
)
for
(
Index
i
=
alignedEnd
;
i
<
endi
;
i
++
)
{
res
[
i
]
+=
cj0
.
pmul
(
A0
[
i
],
t0
)
+
cj0
.
pmul
(
A1
[
i
],
t1
);
t2
+=
cj1
.
pmul
(
A0
[
i
],
rhs
[
i
]);
...
...
@@ -139,12 +137,11 @@ static EIGEN_DONT_INLINE void run(
}
for
(
Index
j
=
FirstTriangular
?
0
:
bound
;
j
<
(
FirstTriangular
?
bound
:
size
);
j
++
)
{
register
const
Scalar
*
EIGEN_RESTRICT
A0
=
lhs
+
j
*
lhsStride
;
const
Scalar
*
EIGEN_RESTRICT
A0
=
lhs
+
j
*
lhsStride
;
Scalar
t1
=
cjAlpha
*
rhs
[
j
];
Scalar
t2
(
0
);
// TODO make sure this product is a real * complex and that the rhs is properly conjugated if needed
res
[
j
]
+=
cjd
.
pmul
(
internal
::
real
(
A0
[
j
]),
t1
);
res
[
j
]
+=
cjd
.
pmul
(
numext
::
real
(
A0
[
j
]),
t1
);
for
(
Index
i
=
FirstTriangular
?
0
:
j
+
1
;
i
<
(
FirstTriangular
?
j
:
size
);
i
++
)
{
res
[
i
]
+=
cj0
.
pmul
(
A0
[
i
],
t1
);
...
...
@@ -153,7 +150,6 @@ static EIGEN_DONT_INLINE void run(
res
[
j
]
+=
alpha
*
t2
;
}
}
};
}
// end namespace internal
...
...
@@ -162,45 +158,44 @@ static EIGEN_DONT_INLINE void run(
***************************************************************************/
namespace
internal
{
template
<
typename
Lhs
,
int
LhsMode
,
typename
Rhs
>
struct
traits
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
0
,
true
>
>
:
traits
<
ProductBase
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
0
,
true
>
,
Lhs
,
Rhs
>
>
{};
}
template
<
typename
Lhs
,
int
LhsMode
,
typename
Rhs
>
struct
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
0
,
true
>
:
public
ProductBase
<
SelfadjointProductMatrix
<
Lhs
,
LhsMode
,
false
,
Rhs
,
0
,
true
>
,
Lhs
,
Rhs
>
struct
selfadjoint_product_impl
<
Lhs
,
LhsMode
,
false
,
Rhs
,
0
,
true
>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE
(
SelfadjointProductMatrix
)
enum
{
LhsUpLo
=
LhsMode
&
(
Upper
|
Lower
)
};
SelfadjointProductMatrix
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
)
:
Base
(
lhs
,
rhs
)
{}
template
<
typename
Dest
>
void
scaleAndAddTo
(
Dest
&
dest
,
Scalar
alpha
)
const
typedef
typename
Product
<
Lhs
,
Rhs
>::
Scalar
Scalar
;
typedef
internal
::
blas_traits
<
Lhs
>
LhsBlasTraits
;
typedef
typename
LhsBlasTraits
::
DirectLinearAccessType
ActualLhsType
;
typedef
typename
internal
::
remove_all
<
ActualLhsType
>::
type
ActualLhsTypeCleaned
;
typedef
internal
::
blas_traits
<
Rhs
>
RhsBlasTraits
;
typedef
typename
RhsBlasTraits
::
DirectLinearAccessType
ActualRhsType
;
typedef
typename
internal
::
remove_all
<
ActualRhsType
>::
type
ActualRhsTypeCleaned
;
enum
{
LhsUpLo
=
LhsMode
&
(
Upper
|
Lower
)
};
template
<
typename
Dest
>
static
void
run
(
Dest
&
dest
,
const
Lhs
&
a_lhs
,
const
Rhs
&
a_rhs
,
const
Scalar
&
alpha
)
{
typedef
typename
Dest
::
Scalar
ResScalar
;
typedef
typename
Base
::
RhsScalar
RhsScalar
;
typedef
Map
<
Matrix
<
ResScalar
,
Dynamic
,
1
>
,
Aligned
>
MappedDest
;
typedef
typename
Rhs
::
Scalar
RhsScalar
;
typedef
Map
<
Matrix
<
ResScalar
,
Dynamic
,
1
>
,
EIGEN_PLAIN_ENUM_MIN
(
AlignedMax
,
internal
::
packet_traits
<
ResScalar
>::
size
)
>
MappedDest
;
eigen_assert
(
dest
.
rows
()
==
m
_lhs
.
rows
()
&&
dest
.
cols
()
==
m
_rhs
.
cols
());
eigen_assert
(
dest
.
rows
()
==
a
_lhs
.
rows
()
&&
dest
.
cols
()
==
a
_rhs
.
cols
());
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
lhs
=
LhsBlasTraits
::
extract
(
m
_lhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
rhs
=
RhsBlasTraits
::
extract
(
m
_rhs
);
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
lhs
=
LhsBlasTraits
::
extract
(
a
_lhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
rhs
=
RhsBlasTraits
::
extract
(
a
_rhs
);
Scalar
actualAlpha
=
alpha
*
LhsBlasTraits
::
extractScalarFactor
(
m
_lhs
)
*
RhsBlasTraits
::
extractScalarFactor
(
m
_rhs
);
Scalar
actualAlpha
=
alpha
*
LhsBlasTraits
::
extractScalarFactor
(
a
_lhs
)
*
RhsBlasTraits
::
extractScalarFactor
(
a
_rhs
);
enum
{
EvalToDest
=
(
Dest
::
InnerStrideAtCompileTime
==
1
),
UseRhs
=
(
_
ActualRhsType
::
InnerStrideAtCompileTime
==
1
)
UseRhs
=
(
ActualRhsType
Cleaned
::
InnerStrideAtCompileTime
==
1
)
};
internal
::
gemv_static_vector_if
<
ResScalar
,
Dest
::
SizeAtCompileTime
,
Dest
::
MaxSizeAtCompileTime
,
!
EvalToDest
>
static_dest
;
internal
::
gemv_static_vector_if
<
RhsScalar
,
_
ActualRhsType
::
SizeAtCompileTime
,
_
ActualRhsType
::
MaxSizeAtCompileTime
,
!
UseRhs
>
static_rhs
;
internal
::
gemv_static_vector_if
<
RhsScalar
,
ActualRhsType
Cleaned
::
SizeAtCompileTime
,
ActualRhsType
Cleaned
::
MaxSizeAtCompileTime
,
!
UseRhs
>
static_rhs
;
ei_declare_aligned_stack_constructed_variable
(
ResScalar
,
actualDestPtr
,
dest
.
size
(),
EvalToDest
?
dest
.
data
()
:
static_dest
.
data
());
...
...
@@ -211,7 +206,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
if
(
!
EvalToDest
)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int
size
=
dest
.
size
();
Index
size
=
dest
.
size
();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
MappedDest
(
actualDestPtr
,
dest
.
size
())
=
dest
;
...
...
@@ -220,18 +215,19 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
if
(
!
UseRhs
)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int
size
=
rhs
.
size
();
Index
size
=
rhs
.
size
();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map
<
typename
_
ActualRhsType
::
PlainObject
>
(
actualRhsPtr
,
rhs
.
size
())
=
rhs
;
Map
<
typename
ActualRhsType
Cleaned
::
PlainObject
>
(
actualRhsPtr
,
rhs
.
size
())
=
rhs
;
}
internal
::
selfadjoint_matrix_vector_product
<
Scalar
,
Index
,
(
internal
::
traits
<
_ActualLhsType
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
int
(
LhsUpLo
),
bool
(
LhsBlasTraits
::
NeedToConjugate
),
bool
(
RhsBlasTraits
::
NeedToConjugate
)
>::
run
internal
::
selfadjoint_matrix_vector_product
<
Scalar
,
Index
,
(
internal
::
traits
<
ActualLhsTypeCleaned
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
int
(
LhsUpLo
),
bool
(
LhsBlasTraits
::
NeedToConjugate
),
bool
(
RhsBlasTraits
::
NeedToConjugate
)
>::
run
(
lhs
.
rows
(),
// size
&
lhs
.
coeffRef
(
0
,
0
),
lhs
.
outerStride
(),
// lhs info
actualRhsPtr
,
1
,
// rhs info
actualRhsPtr
,
// rhs info
actualDestPtr
,
// result info
actualAlpha
// scale factor
);
...
...
@@ -241,34 +237,24 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>
}
};
namespace
internal
{
template
<
typename
Lhs
,
typename
Rhs
,
int
RhsMode
>
struct
traits
<
SelfadjointProductMatrix
<
Lhs
,
0
,
true
,
Rhs
,
RhsMode
,
false
>
>
:
traits
<
ProductBase
<
SelfadjointProductMatrix
<
Lhs
,
0
,
true
,
Rhs
,
RhsMode
,
false
>
,
Lhs
,
Rhs
>
>
{};
}
template
<
typename
Lhs
,
typename
Rhs
,
int
RhsMode
>
struct
SelfadjointProductMatrix
<
Lhs
,
0
,
true
,
Rhs
,
RhsMode
,
false
>
:
public
ProductBase
<
SelfadjointProductMatrix
<
Lhs
,
0
,
true
,
Rhs
,
RhsMode
,
false
>
,
Lhs
,
Rhs
>
struct
selfadjoint_product_impl
<
Lhs
,
0
,
true
,
Rhs
,
RhsMode
,
false
>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE
(
SelfadjointProductMatrix
)
typedef
typename
Product
<
Lhs
,
Rhs
>::
Scalar
Scalar
;
enum
{
RhsUpLo
=
RhsMode
&
(
Upper
|
Lower
)
};
enum
{
RhsUpLo
=
RhsMode
&
(
Upper
|
Lower
)
};
SelfadjointProductMatrix
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
)
:
Base
(
lhs
,
rhs
)
{}
template
<
typename
Dest
>
void
scaleAndAddTo
(
Dest
&
dest
,
Scalar
alpha
)
const
template
<
typename
Dest
>
static
void
run
(
Dest
&
dest
,
const
Lhs
&
a_lhs
,
const
Rhs
&
a_rhs
,
const
Scalar
&
alpha
)
{
// let's simply transpose the product
Transpose
<
Dest
>
destT
(
dest
);
S
elfadjoint
P
roduct
Matrix
<
Transpose
<
const
Rhs
>
,
int
(
RhsUpLo
)
==
Upper
?
Lower
:
Upper
,
false
,
Transpose
<
const
Lhs
>
,
0
,
true
>
(
m
_rhs
.
transpose
(),
m
_lhs
.
transpose
()
).
scaleAndAddTo
(
destT
,
alpha
);
s
elfadjoint
_p
roduct
_impl
<
Transpose
<
const
Rhs
>
,
int
(
RhsUpLo
)
==
Upper
?
Lower
:
Upper
,
false
,
Transpose
<
const
Lhs
>
,
0
,
true
>
::
run
(
destT
,
a
_rhs
.
transpose
(),
a
_lhs
.
transpose
(),
alpha
);
}
};
}
// end namespace internal
}
// end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_H
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
0 → 100644
View file @
13b115ab
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Selfadjoint matrix-vector product functionality based on ?SYMV/HEMV.
********************************************************************************
*/
#ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
#define EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
namespace
Eigen
{
namespace
internal
{
/**********************************************************************
* This file implements selfadjoint matrix-vector multiplication using BLAS
**********************************************************************/
// symv/hemv specialization
template
<
typename
Scalar
,
typename
Index
,
int
StorageOrder
,
int
UpLo
,
bool
ConjugateLhs
,
bool
ConjugateRhs
>
struct
selfadjoint_matrix_vector_product_symv
:
selfadjoint_matrix_vector_product
<
Scalar
,
Index
,
StorageOrder
,
UpLo
,
ConjugateLhs
,
ConjugateRhs
,
BuiltIn
>
{};
#define EIGEN_BLAS_SYMV_SPECIALIZE(Scalar) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
static void run( \
Index size, const Scalar* lhs, Index lhsStride, \
const Scalar* _rhs, Scalar* res, Scalar alpha) { \
enum {\
IsColMajor = StorageOrder==ColMajor \
}; \
if (IsColMajor == ConjugateLhs) {\
selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,BuiltIn>::run( \
size, lhs, lhsStride, _rhs, res, alpha); \
} else {\
selfadjoint_matrix_vector_product_symv<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs>::run( \
size, lhs, lhsStride, _rhs, res, alpha); \
}\
} \
}; \
EIGEN_BLAS_SYMV_SPECIALIZE
(
double
)
EIGEN_BLAS_SYMV_SPECIALIZE
(
float
)
EIGEN_BLAS_SYMV_SPECIALIZE
(
dcomplex
)
EIGEN_BLAS_SYMV_SPECIALIZE
(
scomplex
)
#define EIGEN_BLAS_SYMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs> \
{ \
typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
\
static void run( \
Index size, const EIGTYPE* lhs, Index lhsStride, \
const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
{ \
enum {\
IsRowMajor = StorageOrder==RowMajor ? 1 : 0, \
IsLower = UpLo == Lower ? 1 : 0 \
}; \
BlasIndex n=convert_index<BlasIndex>(size), lda=convert_index<BlasIndex>(lhsStride), incx=1, incy=1; \
EIGTYPE beta(1); \
const EIGTYPE *x_ptr; \
char uplo=(IsRowMajor) ? (IsLower ? 'U' : 'L') : (IsLower ? 'L' : 'U'); \
SYMVVector x_tmp; \
if (ConjugateRhs) { \
Map<const SYMVVector, 0 > map_x(_rhs,size,1); \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
} else x_ptr=_rhs; \
BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_SYMV_SPECIALIZATION
(
double
,
double
,
dsymv
)
EIGEN_BLAS_SYMV_SPECIALIZATION
(
float
,
float
,
ssymv
)
EIGEN_BLAS_SYMV_SPECIALIZATION
(
dcomplex
,
MKL_Complex16
,
zhemv
)
EIGEN_BLAS_SYMV_SPECIALIZATION
(
scomplex
,
MKL_Complex8
,
chemv
)
#else
EIGEN_BLAS_SYMV_SPECIALIZATION
(
double
,
double
,
dsymv_
)
EIGEN_BLAS_SYMV_SPECIALIZATION
(
float
,
float
,
ssymv_
)
EIGEN_BLAS_SYMV_SPECIALIZATION
(
dcomplex
,
double
,
zhemv_
)
EIGEN_BLAS_SYMV_SPECIALIZATION
(
scomplex
,
float
,
chemv_
)
#endif
}
// end namespace internal
}
// end namespace Eigen
#endif // EIGEN_SELFADJOINT_MATRIX_VECTOR_BLAS_H
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointProduct.h
View file @
13b115ab
...
...
@@ -18,21 +18,19 @@
namespace
Eigen
{
template
<
typename
Scalar
,
typename
Index
,
int
StorageOrder
,
int
UpLo
,
bool
ConjLhs
,
bool
ConjRhs
>
struct
selfadjoint_rank1_update
;
template
<
typename
Scalar
,
typename
Index
,
int
UpLo
,
bool
ConjLhs
,
bool
ConjRhs
>
struct
selfadjoint_rank1_update
<
Scalar
,
Index
,
ColMajor
,
UpLo
,
ConjLhs
,
ConjRhs
>
{
static
void
run
(
Index
size
,
Scalar
*
mat
,
Index
stride
,
const
Scalar
*
vec
,
Scalar
alpha
)
static
void
run
(
Index
size
,
Scalar
*
mat
,
Index
stride
,
const
Scalar
*
vec
X
,
const
Scalar
*
vecY
,
const
Scalar
&
alpha
)
{
internal
::
conj_if
<
ConjRhs
>
cj
;
typedef
Map
<
const
Matrix
<
Scalar
,
Dynamic
,
1
>
>
OtherMap
;
typedef
typename
internal
::
conditional
<
ConjLhs
,
typename
OtherMap
::
ConjugateReturnType
,
const
OtherMap
&>::
type
Conj
R
hsType
;
typedef
typename
internal
::
conditional
<
ConjLhs
,
typename
OtherMap
::
ConjugateReturnType
,
const
OtherMap
&>::
type
Conj
L
hsType
;
for
(
Index
i
=
0
;
i
<
size
;
++
i
)
{
Map
<
Matrix
<
Scalar
,
Dynamic
,
1
>
>
(
mat
+
stride
*
i
+
(
UpLo
==
Lower
?
i
:
0
),
(
UpLo
==
Lower
?
size
-
i
:
(
i
+
1
)))
+=
(
alpha
*
cj
(
vec
[
i
]))
*
Conj
R
hsType
(
OtherMap
(
vec
+
(
UpLo
==
Lower
?
i
:
0
),
UpLo
==
Lower
?
size
-
i
:
(
i
+
1
)));
+=
(
alpha
*
cj
(
vec
Y
[
i
]))
*
Conj
L
hsType
(
OtherMap
(
vec
X
+
(
UpLo
==
Lower
?
i
:
0
),
UpLo
==
Lower
?
size
-
i
:
(
i
+
1
)));
}
}
};
...
...
@@ -40,9 +38,9 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
template
<
typename
Scalar
,
typename
Index
,
int
UpLo
,
bool
ConjLhs
,
bool
ConjRhs
>
struct
selfadjoint_rank1_update
<
Scalar
,
Index
,
RowMajor
,
UpLo
,
ConjLhs
,
ConjRhs
>
{
static
void
run
(
Index
size
,
Scalar
*
mat
,
Index
stride
,
const
Scalar
*
vec
,
Scalar
alpha
)
static
void
run
(
Index
size
,
Scalar
*
mat
,
Index
stride
,
const
Scalar
*
vec
X
,
const
Scalar
*
vecY
,
const
Scalar
&
alpha
)
{
selfadjoint_rank1_update
<
Scalar
,
Index
,
ColMajor
,
UpLo
==
Lower
?
Upper
:
Lower
,
ConjRhs
,
ConjLhs
>::
run
(
size
,
mat
,
stride
,
vec
,
alpha
);
selfadjoint_rank1_update
<
Scalar
,
Index
,
ColMajor
,
UpLo
==
Lower
?
Upper
:
Lower
,
ConjRhs
,
ConjLhs
>::
run
(
size
,
mat
,
stride
,
vec
Y
,
vecX
,
alpha
);
}
};
...
...
@@ -52,10 +50,9 @@ struct selfadjoint_product_selector;
template
<
typename
MatrixType
,
typename
OtherType
,
int
UpLo
>
struct
selfadjoint_product_selector
<
MatrixType
,
OtherType
,
UpLo
,
true
>
{
static
void
run
(
MatrixType
&
mat
,
const
OtherType
&
other
,
typename
MatrixType
::
Scalar
alpha
)
static
void
run
(
MatrixType
&
mat
,
const
OtherType
&
other
,
const
typename
MatrixType
::
Scalar
&
alpha
)
{
typedef
typename
MatrixType
::
Scalar
Scalar
;
typedef
typename
MatrixType
::
Index
Index
;
typedef
internal
::
blas_traits
<
OtherType
>
OtherBlasTraits
;
typedef
typename
OtherBlasTraits
::
DirectLinearAccessType
ActualOtherType
;
typedef
typename
internal
::
remove_all
<
ActualOtherType
>::
type
_ActualOtherType
;
...
...
@@ -78,17 +75,16 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
selfadjoint_rank1_update
<
Scalar
,
Index
,
StorageOrder
,
UpLo
,
OtherBlasTraits
::
NeedToConjugate
&&
NumTraits
<
Scalar
>::
IsComplex
,
(
!
OtherBlasTraits
::
NeedToConjugate
)
&&
NumTraits
<
Scalar
>::
IsComplex
>
::
run
(
other
.
size
(),
mat
.
data
(),
mat
.
outerStride
(),
actualOtherPtr
,
actualAlpha
);
::
run
(
other
.
size
(),
mat
.
data
(),
mat
.
outerStride
(),
actualOtherPtr
,
actualOtherPtr
,
actualAlpha
);
}
};
template
<
typename
MatrixType
,
typename
OtherType
,
int
UpLo
>
struct
selfadjoint_product_selector
<
MatrixType
,
OtherType
,
UpLo
,
false
>
{
static
void
run
(
MatrixType
&
mat
,
const
OtherType
&
other
,
typename
MatrixType
::
Scalar
alpha
)
static
void
run
(
MatrixType
&
mat
,
const
OtherType
&
other
,
const
typename
MatrixType
::
Scalar
&
alpha
)
{
typedef
typename
MatrixType
::
Scalar
Scalar
;
typedef
typename
MatrixType
::
Index
Index
;
typedef
internal
::
blas_traits
<
OtherType
>
OtherBlasTraits
;
typedef
typename
OtherBlasTraits
::
DirectLinearAccessType
ActualOtherType
;
typedef
typename
internal
::
remove_all
<
ActualOtherType
>::
type
_ActualOtherType
;
...
...
@@ -96,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
Scalar
actualAlpha
=
alpha
*
OtherBlasTraits
::
extractScalarFactor
(
other
.
derived
());
enum
{
IsRowMajor
=
(
internal
::
traits
<
MatrixType
>::
Flags
&
RowMajorBit
)
?
1
:
0
};
enum
{
IsRowMajor
=
(
internal
::
traits
<
MatrixType
>::
Flags
&
RowMajorBit
)
?
1
:
0
,
OtherIsRowMajor
=
_ActualOtherType
::
Flags
&
RowMajorBit
?
1
:
0
};
Index
size
=
mat
.
cols
();
Index
depth
=
actualOther
.
cols
();
typedef
internal
::
gemm_blocking_space
<
IsRowMajor
?
RowMajor
:
ColMajor
,
Scalar
,
Scalar
,
MatrixType
::
MaxColsAtCompileTime
,
MatrixType
::
MaxColsAtCompileTime
,
_ActualOtherType
::
MaxColsAtCompileTime
>
BlockingType
;
BlockingType
blocking
(
size
,
size
,
depth
,
1
,
false
);
internal
::
general_matrix_matrix_triangular_product
<
Index
,
Scalar
,
_ActualOtherType
::
Flags
&
RowMajor
Bit
?
RowMajor
:
ColMajor
,
OtherBlasTraits
::
NeedToConjugate
&&
NumTraits
<
Scalar
>::
IsComplex
,
Scalar
,
_ActualOtherType
::
Flags
&
RowMajor
Bit
?
ColMajor
:
RowMajor
,
(
!
OtherBlasTraits
::
NeedToConjugate
)
&&
NumTraits
<
Scalar
>::
IsComplex
,
MatrixType
::
Flags
&
RowMajor
Bit
?
RowMajor
:
ColMajor
,
UpLo
>
::
run
(
mat
.
cols
(),
actualOther
.
cols
()
,
Scalar
,
OtherIs
RowMajor
?
RowMajor
:
ColMajor
,
OtherBlasTraits
::
NeedToConjugate
&&
NumTraits
<
Scalar
>::
IsComplex
,
Scalar
,
OtherIs
RowMajor
?
ColMajor
:
RowMajor
,
(
!
OtherBlasTraits
::
NeedToConjugate
)
&&
NumTraits
<
Scalar
>::
IsComplex
,
Is
RowMajor
?
RowMajor
:
ColMajor
,
MatrixType
::
InnerStrideAtCompileTime
,
UpLo
>
::
run
(
size
,
depth
,
&
actualOther
.
coeffRef
(
0
,
0
),
actualOther
.
outerStride
(),
&
actualOther
.
coeffRef
(
0
,
0
),
actualOther
.
outerStride
(),
mat
.
data
(),
mat
.
outerStride
(),
actualAlpha
);
mat
.
data
(),
mat
.
innerStride
(),
mat
.
outerStride
(),
actualAlpha
,
blocking
);
}
};
...
...
@@ -113,7 +121,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
template
<
typename
MatrixType
,
unsigned
int
UpLo
>
template
<
typename
DerivedU
>
SelfAdjointView
<
MatrixType
,
UpLo
>&
SelfAdjointView
<
MatrixType
,
UpLo
>
::
rankUpdate
(
const
MatrixBase
<
DerivedU
>&
u
,
Scalar
alpha
)
::
rankUpdate
(
const
MatrixBase
<
DerivedU
>&
u
,
const
Scalar
&
alpha
)
{
selfadjoint_product_selector
<
MatrixType
,
DerivedU
,
UpLo
>::
run
(
_expression
().
const_cast_derived
(),
u
.
derived
(),
alpha
);
...
...
pydensecrf/densecrf/include/Eigen/src/Core/products/SelfadjointRank2Update.h
View file @
13b115ab
...
...
@@ -24,14 +24,14 @@ struct selfadjoint_rank2_update_selector;
template
<
typename
Scalar
,
typename
Index
,
typename
UType
,
typename
VType
>
struct
selfadjoint_rank2_update_selector
<
Scalar
,
Index
,
UType
,
VType
,
Lower
>
{
static
void
run
(
Scalar
*
mat
,
Index
stride
,
const
UType
&
u
,
const
VType
&
v
,
Scalar
alpha
)
static
void
run
(
Scalar
*
mat
,
Index
stride
,
const
UType
&
u
,
const
VType
&
v
,
const
Scalar
&
alpha
)
{
const
Index
size
=
u
.
size
();
for
(
Index
i
=
0
;
i
<
size
;
++
i
)
{
Map
<
Matrix
<
Scalar
,
Dynamic
,
1
>
>
(
mat
+
stride
*
i
+
i
,
size
-
i
)
+=
(
conj
(
alpha
)
*
conj
(
u
.
coeff
(
i
)))
*
v
.
tail
(
size
-
i
)
+
(
alpha
*
conj
(
v
.
coeff
(
i
)))
*
u
.
tail
(
size
-
i
);
(
numext
::
conj
(
alpha
)
*
numext
::
conj
(
u
.
coeff
(
i
)))
*
v
.
tail
(
size
-
i
)
+
(
alpha
*
numext
::
conj
(
v
.
coeff
(
i
)))
*
u
.
tail
(
size
-
i
);
}
}
};
...
...
@@ -39,13 +39,13 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
template
<
typename
Scalar
,
typename
Index
,
typename
UType
,
typename
VType
>
struct
selfadjoint_rank2_update_selector
<
Scalar
,
Index
,
UType
,
VType
,
Upper
>
{
static
void
run
(
Scalar
*
mat
,
Index
stride
,
const
UType
&
u
,
const
VType
&
v
,
Scalar
alpha
)
static
void
run
(
Scalar
*
mat
,
Index
stride
,
const
UType
&
u
,
const
VType
&
v
,
const
Scalar
&
alpha
)
{
const
Index
size
=
u
.
size
();
for
(
Index
i
=
0
;
i
<
size
;
++
i
)
Map
<
Matrix
<
Scalar
,
Dynamic
,
1
>
>
(
mat
+
stride
*
i
,
i
+
1
)
+=
(
conj
(
alpha
)
*
conj
(
u
.
coeff
(
i
)))
*
v
.
head
(
i
+
1
)
+
(
alpha
*
conj
(
v
.
coeff
(
i
)))
*
u
.
head
(
i
+
1
);
(
numext
::
conj
(
alpha
)
*
numext
::
conj
(
u
.
coeff
(
i
)))
*
v
.
head
(
i
+
1
)
+
(
alpha
*
numext
::
conj
(
v
.
coeff
(
i
)))
*
u
.
head
(
i
+
1
);
}
};
...
...
@@ -58,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
template
<
typename
MatrixType
,
unsigned
int
UpLo
>
template
<
typename
DerivedU
,
typename
DerivedV
>
SelfAdjointView
<
MatrixType
,
UpLo
>&
SelfAdjointView
<
MatrixType
,
UpLo
>
::
rankUpdate
(
const
MatrixBase
<
DerivedU
>&
u
,
const
MatrixBase
<
DerivedV
>&
v
,
Scalar
alpha
)
::
rankUpdate
(
const
MatrixBase
<
DerivedU
>&
u
,
const
MatrixBase
<
DerivedV
>&
v
,
const
Scalar
&
alpha
)
{
typedef
internal
::
blas_traits
<
DerivedU
>
UBlasTraits
;
typedef
typename
UBlasTraits
::
DirectLinearAccessType
ActualUType
;
...
...
@@ -75,15 +75,15 @@ SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
enum
{
IsRowMajor
=
(
internal
::
traits
<
MatrixType
>::
Flags
&
RowMajorBit
)
?
1
:
0
};
Scalar
actualAlpha
=
alpha
*
UBlasTraits
::
extractScalarFactor
(
u
.
derived
())
*
internal
::
conj
(
VBlasTraits
::
extractScalarFactor
(
v
.
derived
()));
*
numext
::
conj
(
VBlasTraits
::
extractScalarFactor
(
v
.
derived
()));
if
(
IsRowMajor
)
actualAlpha
=
internal
::
conj
(
actualAlpha
);
actualAlpha
=
numext
::
conj
(
actualAlpha
);
internal
::
selfadjoint_rank2_update_selector
<
Scalar
,
Index
,
typename
internal
::
remove_all
<
typename
internal
::
conj_expr_if
<
IsRowMajor
^
U
BlasTraits
::
NeedToConjugate
,
_Actual
U
Type
>::
type
>::
type
,
typename
internal
::
remove_all
<
typename
internal
::
conj_expr_if
<
IsRowMajor
^
VBlasTraits
::
NeedToConjugate
,
_ActualVType
>::
type
>::
t
ype
,
typedef
typename
internal
::
remove_all
<
typename
internal
::
conj_expr_if
<
IsRowMajor
^
UBlasTraits
::
NeedToConjugate
,
_ActualUType
>::
type
>::
type
UType
;
typedef
typename
internal
::
remove_all
<
typename
internal
::
conj_expr_if
<
IsRowMajor
^
V
BlasTraits
::
NeedToConjugate
,
_Actual
V
Type
>::
type
>::
type
VType
;
internal
::
selfadjoint_rank2_update_selector
<
Scalar
,
Index
,
UType
,
VT
ype
,
(
IsRowMajor
?
int
(
UpLo
==
Upper
?
Lower
:
Upper
)
:
UpLo
)
>
::
run
(
_expression
().
const_cast_derived
().
data
(),
_expression
().
outerStride
(),
actualU
,
actualV
,
actualAlpha
);
::
run
(
_expression
().
const_cast_derived
().
data
(),
_expression
().
outerStride
(),
UType
(
actualU
),
VType
(
actualV
)
,
actualAlpha
);
return
*
this
;
}
...
...
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularMatrixMatrix.h
View file @
13b115ab
...
...
@@ -45,23 +45,25 @@ template <typename Scalar, typename Index,
int
Mode
,
bool
LhsIsTriangular
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResStorageOrder
,
int
Version
=
Specialized
>
int
ResStorageOrder
,
int
ResInnerStride
,
int
Version
=
Specialized
>
struct
product_triangular_matrix_matrix
;
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
bool
LhsIsTriangular
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
Version
>
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
,
int
Version
>
struct
product_triangular_matrix_matrix
<
Scalar
,
Index
,
Mode
,
LhsIsTriangular
,
LhsStorageOrder
,
ConjugateLhs
,
RhsStorageOrder
,
ConjugateRhs
,
RowMajor
,
Version
>
RhsStorageOrder
,
ConjugateRhs
,
RowMajor
,
ResInnerStride
,
Version
>
{
static
EIGEN_STRONG_INLINE
void
run
(
Index
rows
,
Index
cols
,
Index
depth
,
const
Scalar
*
lhs
,
Index
lhsStride
,
const
Scalar
*
rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
Scalar
*
res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
product_triangular_matrix_matrix
<
Scalar
,
Index
,
(
Mode
&
(
UnitDiag
|
ZeroDiag
))
|
((
Mode
&
Upper
)
?
Lower
:
Upper
),
...
...
@@ -70,18 +72,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
ConjugateRhs
,
LhsStorageOrder
==
RowMajor
?
ColMajor
:
RowMajor
,
ConjugateLhs
,
ColMajor
>
::
run
(
cols
,
rows
,
depth
,
rhs
,
rhsStride
,
lhs
,
lhsStride
,
res
,
resStride
,
alpha
,
blocking
);
ColMajor
,
ResInnerStride
>
::
run
(
cols
,
rows
,
depth
,
rhs
,
rhsStride
,
lhs
,
lhsStride
,
res
,
resIncr
,
resStride
,
alpha
,
blocking
);
}
};
// implements col-major += alpha * op(triangular) * op(general)
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
Version
>
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
,
int
Version
>
struct
product_triangular_matrix_matrix
<
Scalar
,
Index
,
Mode
,
true
,
LhsStorageOrder
,
ConjugateLhs
,
RhsStorageOrder
,
ConjugateRhs
,
ColMajor
,
Version
>
RhsStorageOrder
,
ConjugateRhs
,
ColMajor
,
ResInnerStride
,
Version
>
{
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
...
...
@@ -95,8 +98,22 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index
_rows
,
Index
_cols
,
Index
_depth
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
Scalar
*
res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
);
};
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
,
int
Version
>
EIGEN_DONT_INLINE
void
product_triangular_matrix_matrix
<
Scalar
,
Index
,
Mode
,
true
,
LhsStorageOrder
,
ConjugateLhs
,
RhsStorageOrder
,
ConjugateRhs
,
ColMajor
,
ResInnerStride
,
Version
>::
run
(
Index
_rows
,
Index
_cols
,
Index
_depth
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
_res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
// strip zeros
Index
diagSize
=
(
std
::
min
)(
_rows
,
_depth
);
...
...
@@ -104,30 +121,42 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index
depth
=
IsLower
?
diagSize
:
_depth
;
Index
cols
=
_cols
;
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
lhs
(
_lhs
,
lhsStride
);
const_blas_data_mapper
<
Scalar
,
Index
,
RhsStorageOrder
>
rhs
(
_rhs
,
rhsStride
);
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
LhsMapper
;
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
RhsStorageOrder
>
RhsMapper
;
typedef
blas_data_mapper
<
typename
Traits
::
ResScalar
,
Index
,
ColMajor
,
Unaligned
,
ResInnerStride
>
ResMapper
;
LhsMapper
lhs
(
_lhs
,
lhsStride
);
RhsMapper
rhs
(
_rhs
,
rhsStride
);
ResMapper
res
(
_res
,
resStride
,
resIncr
);
Index
kc
=
blocking
.
kc
();
// cache block size along the K direction
Index
mc
=
(
std
::
min
)(
rows
,
blocking
.
mc
());
// cache block size along the M direction
// The small panel size must not be larger than blocking size.
// Usually this should never be the case because SmallPanelWidth^2 is very small
// compared to L2 cache size, but let's be safe:
Index
panelWidth
=
(
std
::
min
)(
Index
(
SmallPanelWidth
),(
std
::
min
)(
kc
,
mc
));
std
::
size_t
sizeA
=
kc
*
mc
;
std
::
size_t
sizeB
=
kc
*
cols
;
std
::
size_t
sizeW
=
kc
*
Traits
::
WorkSpaceFactor
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
sizeA
,
blocking
.
blockA
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockB
,
sizeB
,
blocking
.
blockB
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockW
,
sizeW
,
blocking
.
blockW
());
Matrix
<
Scalar
,
SmallPanelWidth
,
SmallPanelWidth
,
LhsStorageOrder
>
triangularBuffer
;
// To work around an "error: member reference base type 'Matrix<...>
// (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
// not a structure or union" compilation error in nvcc (tested V8.0.61),
// create a dummy internal::constructor_without_unaligned_array_assert
// object to pass to the Matrix constructor.
internal
::
constructor_without_unaligned_array_assert
a
;
Matrix
<
Scalar
,
SmallPanelWidth
,
SmallPanelWidth
,
LhsStorageOrder
>
triangularBuffer
(
a
);
triangularBuffer
.
setZero
();
if
((
Mode
&
ZeroDiag
)
==
ZeroDiag
)
triangularBuffer
.
diagonal
().
setZero
();
else
triangularBuffer
.
diagonal
().
setOnes
();
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
ResMapper
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
RhsMapper
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
for
(
Index
k2
=
IsLower
?
depth
:
0
;
IsLower
?
k2
>
0
:
k2
<
depth
;
...
...
@@ -143,7 +172,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
k2
=
k2
+
actual_kc
-
kc
;
}
pack_rhs
(
blockB
,
&
rhs
(
actual_k2
,
0
),
rhsStride
,
actual_kc
,
cols
);
pack_rhs
(
blockB
,
rhs
.
getSubMapper
(
actual_k2
,
0
),
actual_kc
,
cols
);
// the selected lhs's panel has to be split in three different parts:
// 1 - the part which is zero => skip it
...
...
@@ -154,9 +183,9 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
if
(
IsLower
||
actual_k2
<
rows
)
{
// for each small vertical panels of lhs
for
(
Index
k1
=
0
;
k1
<
actual_kc
;
k1
+=
SmallP
anelWidth
)
for
(
Index
k1
=
0
;
k1
<
actual_kc
;
k1
+=
p
anelWidth
)
{
Index
actualPanelWidth
=
std
::
min
<
Index
>
(
actual_kc
-
k1
,
SmallP
anelWidth
);
Index
actualPanelWidth
=
std
::
min
<
Index
>
(
actual_kc
-
k1
,
p
anelWidth
);
Index
lengthTarget
=
IsLower
?
actual_kc
-
k1
-
actualPanelWidth
:
k1
;
Index
startBlock
=
actual_k2
+
k1
;
Index
blockBOffset
=
k1
;
...
...
@@ -171,20 +200,22 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
for
(
Index
i
=
IsLower
?
k
+
1
:
0
;
IsLower
?
i
<
actualPanelWidth
:
i
<
k
;
++
i
)
triangularBuffer
.
coeffRef
(
i
,
k
)
=
lhs
(
startBlock
+
i
,
startBlock
+
k
);
}
pack_lhs
(
blockA
,
triangularBuffer
.
data
(),
triangularBuffer
.
outerStride
(),
actualPanelWidth
,
actualPanelWidth
);
pack_lhs
(
blockA
,
LhsMapper
(
triangularBuffer
.
data
(),
triangularBuffer
.
outerStride
()
)
,
actualPanelWidth
,
actualPanelWidth
);
gebp_kernel
(
res
+
startBlock
,
resStride
,
blockA
,
blockB
,
actualPanelWidth
,
actualPanelWidth
,
cols
,
alpha
,
actualPanelWidth
,
actual_kc
,
0
,
blockBOffset
,
blockW
);
gebp_kernel
(
res
.
getSubMapper
(
startBlock
,
0
),
blockA
,
blockB
,
actualPanelWidth
,
actualPanelWidth
,
cols
,
alpha
,
actualPanelWidth
,
actual_kc
,
0
,
blockBOffset
);
// GEBP with remaining micro panel
if
(
lengthTarget
>
0
)
{
Index
startTarget
=
IsLower
?
actual_k2
+
k1
+
actualPanelWidth
:
actual_k2
;
pack_lhs
(
blockA
,
&
lhs
(
startTarget
,
startBlock
),
lhsStride
,
actualPanelWidth
,
lengthTarget
);
pack_lhs
(
blockA
,
lhs
.
getSubMapper
(
startTarget
,
startBlock
),
actualPanelWidth
,
lengthTarget
);
gebp_kernel
(
res
+
startTarget
,
resStride
,
blockA
,
blockB
,
lengthTarget
,
actualPanelWidth
,
cols
,
alpha
,
actualPanelWidth
,
actual_kc
,
0
,
blockBOffset
,
blockW
);
gebp_kernel
(
res
.
getSubMapper
(
startTarget
,
0
),
blockA
,
blockB
,
lengthTarget
,
actualPanelWidth
,
cols
,
alpha
,
actualPanelWidth
,
actual_kc
,
0
,
blockBOffset
);
}
}
}
...
...
@@ -195,23 +226,24 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
for
(
Index
i2
=
start
;
i2
<
end
;
i2
+=
mc
)
{
const
Index
actual_mc
=
(
std
::
min
)(
i2
+
mc
,
end
)
-
i2
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
,
false
>
()
(
blockA
,
&
lhs
(
i2
,
actual_k2
),
lhsStride
,
actual_kc
,
actual_mc
);
gemm_pack_lhs
<
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
,
false
>
()
(
blockA
,
lhs
.
getSubMapper
(
i2
,
actual_k2
),
actual_kc
,
actual_mc
);
gebp_kernel
(
res
+
i2
,
resStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
,
-
1
,
-
1
,
0
,
0
,
blockW
);
gebp_kernel
(
res
.
getSubMapper
(
i2
,
0
),
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
alpha
,
-
1
,
-
1
,
0
,
0
);
}
}
}
}
};
// implements col-major += alpha * op(general) * op(triangular)
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
Version
>
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
,
int
Version
>
struct
product_triangular_matrix_matrix
<
Scalar
,
Index
,
Mode
,
false
,
LhsStorageOrder
,
ConjugateLhs
,
RhsStorageOrder
,
ConjugateRhs
,
ColMajor
,
Version
>
LhsStorageOrder
,
ConjugateLhs
,
RhsStorageOrder
,
ConjugateRhs
,
ColMajor
,
ResInnerStride
,
Version
>
{
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
enum
{
...
...
@@ -224,40 +256,58 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index
_rows
,
Index
_cols
,
Index
_depth
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
res
,
Index
resStride
,
Scalar
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
Scalar
*
res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
);
};
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResInnerStride
,
int
Version
>
EIGEN_DONT_INLINE
void
product_triangular_matrix_matrix
<
Scalar
,
Index
,
Mode
,
false
,
LhsStorageOrder
,
ConjugateLhs
,
RhsStorageOrder
,
ConjugateRhs
,
ColMajor
,
ResInnerStride
,
Version
>::
run
(
Index
_rows
,
Index
_cols
,
Index
_depth
,
const
Scalar
*
_lhs
,
Index
lhsStride
,
const
Scalar
*
_rhs
,
Index
rhsStride
,
Scalar
*
_res
,
Index
resIncr
,
Index
resStride
,
const
Scalar
&
alpha
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
const
Index
PacketBytes
=
packet_traits
<
Scalar
>::
size
*
sizeof
(
Scalar
);
// strip zeros
Index
diagSize
=
(
std
::
min
)(
_cols
,
_depth
);
Index
rows
=
_rows
;
Index
depth
=
IsLower
?
_depth
:
diagSize
;
Index
cols
=
IsLower
?
diagSize
:
_cols
;
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
lhs
(
_lhs
,
lhsStride
);
const_blas_data_mapper
<
Scalar
,
Index
,
RhsStorageOrder
>
rhs
(
_rhs
,
rhsStride
);
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
LhsStorageOrder
>
LhsMapper
;
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
RhsStorageOrder
>
RhsMapper
;
typedef
blas_data_mapper
<
typename
Traits
::
ResScalar
,
Index
,
ColMajor
,
Unaligned
,
ResInnerStride
>
ResMapper
;
LhsMapper
lhs
(
_lhs
,
lhsStride
);
RhsMapper
rhs
(
_rhs
,
rhsStride
);
ResMapper
res
(
_res
,
resStride
,
resIncr
);
Index
kc
=
blocking
.
kc
();
// cache block size along the K direction
Index
mc
=
(
std
::
min
)(
rows
,
blocking
.
mc
());
// cache block size along the M direction
std
::
size_t
sizeA
=
kc
*
mc
;
std
::
size_t
sizeB
=
kc
*
cols
;
std
::
size_t
sizeW
=
kc
*
Traits
::
WorkSpaceFactor
;
std
::
size_t
sizeB
=
kc
*
cols
+
EIGEN_MAX_ALIGN_BYTES
/
sizeof
(
Scalar
);
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
sizeA
,
blocking
.
blockA
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockB
,
sizeB
,
blocking
.
blockB
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockW
,
sizeW
,
blocking
.
blockW
());
Matrix
<
Scalar
,
SmallPanelWidth
,
SmallPanelWidth
,
RhsStorageOrder
>
triangularBuffer
;
internal
::
constructor_without_unaligned_array_assert
a
;
Matrix
<
Scalar
,
SmallPanelWidth
,
SmallPanelWidth
,
RhsStorageOrder
>
triangularBuffer
(
a
);
triangularBuffer
.
setZero
();
if
((
Mode
&
ZeroDiag
)
==
ZeroDiag
)
triangularBuffer
.
diagonal
().
setZero
();
else
triangularBuffer
.
diagonal
().
setOnes
();
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
,
false
,
true
>
pack_rhs_panel
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
ResMapper
,
Traits
::
mr
,
Traits
::
nr
,
ConjugateLhs
,
ConjugateRhs
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
LhsStorageOrder
>
pack_lhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
RhsMapper
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
RhsMapper
,
Traits
::
nr
,
RhsStorageOrder
,
false
,
true
>
pack_rhs_panel
;
for
(
Index
k2
=
IsLower
?
0
:
depth
;
IsLower
?
k2
<
depth
:
k2
>
0
;
...
...
@@ -279,8 +329,9 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index
ts
=
(
IsLower
&&
actual_k2
>=
cols
)
?
0
:
actual_kc
;
Scalar
*
geb
=
blockB
+
ts
*
ts
;
geb
=
geb
+
internal
::
first_aligned
<
PacketBytes
>
(
geb
,
PacketBytes
/
sizeof
(
Scalar
));
pack_rhs
(
geb
,
&
rhs
(
actual_k2
,
IsLower
?
0
:
k2
),
rhsStride
,
actual_kc
,
rs
);
pack_rhs
(
geb
,
rhs
.
getSubMapper
(
actual_k2
,
IsLower
?
0
:
k2
),
actual_kc
,
rs
);
// pack the triangular part of the rhs padding the unrolled blocks with zeros
if
(
ts
>
0
)
...
...
@@ -293,7 +344,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index
panelLength
=
IsLower
?
actual_kc
-
j2
-
actualPanelWidth
:
j2
;
// general part
pack_rhs_panel
(
blockB
+
j2
*
actual_kc
,
&
rhs
(
actual_k2
+
panelOffset
,
actual_j2
),
rhsStride
,
rhs
.
getSubMapper
(
actual_k2
+
panelOffset
,
actual_j2
),
panelLength
,
actualPanelWidth
,
actual_kc
,
panelOffset
);
...
...
@@ -307,7 +358,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
}
pack_rhs_panel
(
blockB
+
j2
*
actual_kc
,
triangularBuffer
.
data
(),
triangularBuffer
.
outerStride
(),
RhsMapper
(
triangularBuffer
.
data
(),
triangularBuffer
.
outerStride
()
)
,
actualPanelWidth
,
actualPanelWidth
,
actual_kc
,
j2
);
}
...
...
@@ -316,7 +367,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
for
(
Index
i2
=
0
;
i2
<
rows
;
i2
+=
mc
)
{
const
Index
actual_mc
=
(
std
::
min
)(
mc
,
rows
-
i2
);
pack_lhs
(
blockA
,
&
lhs
(
i2
,
actual_k2
),
lhsStride
,
actual_kc
,
actual_mc
);
pack_lhs
(
blockA
,
lhs
.
getSubMapper
(
i2
,
actual_k2
),
actual_kc
,
actual_mc
);
// triangular kernel
if
(
ts
>
0
)
...
...
@@ -327,50 +378,51 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index
panelLength
=
IsLower
?
actual_kc
-
j2
:
j2
+
actualPanelWidth
;
Index
blockOffset
=
IsLower
?
j2
:
0
;
gebp_kernel
(
res
+
i2
+
(
actual_k2
+
j2
)
*
resStride
,
resStride
,
gebp_kernel
(
res
.
getSubMapper
(
i2
,
actual_k2
+
j2
)
,
blockA
,
blockB
+
j2
*
actual_kc
,
actual_mc
,
panelLength
,
actualPanelWidth
,
alpha
,
actual_kc
,
actual_kc
,
// strides
blockOffset
,
blockOffset
,
// offsets
blockW
);
// workspace
blockOffset
,
blockOffset
);
// offsets
}
}
gebp_kernel
(
res
+
i2
+
(
IsLower
?
0
:
k2
)
*
resStride
,
resStride
,
gebp_kernel
(
res
.
getSubMapper
(
i2
,
IsLower
?
0
:
k2
)
,
blockA
,
geb
,
actual_mc
,
actual_kc
,
rs
,
alpha
,
-
1
,
-
1
,
0
,
0
,
blockW
);
-
1
,
-
1
,
0
,
0
);
}
}
}
};
/***************************************************************************
* Wrapper to product_triangular_matrix_matrix
***************************************************************************/
template
<
int
Mode
,
bool
LhsIsTriangular
,
typename
Lhs
,
typename
Rhs
>
struct
traits
<
TriangularProduct
<
Mode
,
LhsIsTriangular
,
Lhs
,
false
,
Rhs
,
false
>
>
:
traits
<
ProductBase
<
TriangularProduct
<
Mode
,
LhsIsTriangular
,
Lhs
,
false
,
Rhs
,
false
>
,
Lhs
,
Rhs
>
>
{};
}
// end namespace internal
namespace
internal
{
template
<
int
Mode
,
bool
LhsIsTriangular
,
typename
Lhs
,
typename
Rhs
>
struct
TriangularProduct
<
Mode
,
LhsIsTriangular
,
Lhs
,
false
,
Rhs
,
false
>
:
public
ProductBase
<
TriangularProduct
<
Mode
,
LhsIsTriangular
,
Lhs
,
false
,
Rhs
,
false
>
,
Lhs
,
Rhs
>
struct
triangular_product_impl
<
Mode
,
LhsIsTriangular
,
Lhs
,
false
,
Rhs
,
false
>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE
(
TriangularProduct
)
TriangularProduct
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
)
:
Base
(
lhs
,
rhs
)
{}
template
<
typename
Dest
>
void
scaleAndAddTo
(
Dest
&
dst
,
Scalar
alpha
)
const
template
<
typename
Dest
>
static
void
run
(
Dest
&
dst
,
const
Lhs
&
a_lhs
,
const
Rhs
&
a_rhs
,
const
typename
Dest
::
Scalar
&
alpha
)
{
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
lhs
=
LhsBlasTraits
::
extract
(
m_lhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
rhs
=
RhsBlasTraits
::
extract
(
m_rhs
);
typedef
typename
Lhs
::
Scalar
LhsScalar
;
typedef
typename
Rhs
::
Scalar
RhsScalar
;
typedef
typename
Dest
::
Scalar
Scalar
;
typedef
internal
::
blas_traits
<
Lhs
>
LhsBlasTraits
;
typedef
typename
LhsBlasTraits
::
DirectLinearAccessType
ActualLhsType
;
typedef
typename
internal
::
remove_all
<
ActualLhsType
>::
type
ActualLhsTypeCleaned
;
typedef
internal
::
blas_traits
<
Rhs
>
RhsBlasTraits
;
typedef
typename
RhsBlasTraits
::
DirectLinearAccessType
ActualRhsType
;
typedef
typename
internal
::
remove_all
<
ActualRhsType
>::
type
ActualRhsTypeCleaned
;
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
lhs
=
LhsBlasTraits
::
extract
(
a_lhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
rhs
=
RhsBlasTraits
::
extract
(
a_rhs
);
Scalar
actualAlpha
=
alpha
*
LhsBlasTraits
::
extractScalarFactor
(
m_lhs
)
*
RhsBlasTraits
::
extractScalarFactor
(
m_rhs
);
LhsScalar
lhs_alpha
=
LhsBlasTraits
::
extractScalarFactor
(
a_lhs
);
RhsScalar
rhs_alpha
=
RhsBlasTraits
::
extractScalarFactor
(
a_rhs
);
Scalar
actualAlpha
=
alpha
*
lhs_alpha
*
rhs_alpha
;
typedef
internal
::
gemm_blocking_space
<
(
Dest
::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
Scalar
,
Scalar
,
Lhs
::
MaxRowsAtCompileTime
,
Rhs
::
MaxColsAtCompileTime
,
Lhs
::
MaxColsAtCompileTime
,
4
>
BlockingType
;
...
...
@@ -381,23 +433,40 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
Index
stripedDepth
=
LhsIsTriangular
?
((
!
IsLower
)
?
lhs
.
cols
()
:
(
std
::
min
)(
lhs
.
cols
(),
lhs
.
rows
()))
:
((
IsLower
)
?
rhs
.
rows
()
:
(
std
::
min
)(
rhs
.
rows
(),
rhs
.
cols
()));
BlockingType
blocking
(
stripedRows
,
stripedCols
,
stripedDepth
);
BlockingType
blocking
(
stripedRows
,
stripedCols
,
stripedDepth
,
1
,
false
);
internal
::
product_triangular_matrix_matrix
<
Scalar
,
Index
,
Mode
,
LhsIsTriangular
,
(
internal
::
traits
<
_
ActualLhsType
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
LhsBlasTraits
::
NeedToConjugate
,
(
internal
::
traits
<
_
ActualRhsType
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
RhsBlasTraits
::
NeedToConjugate
,
(
internal
::
traits
<
Dest
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
>
(
internal
::
traits
<
ActualLhsType
Cleaned
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
LhsBlasTraits
::
NeedToConjugate
,
(
internal
::
traits
<
ActualRhsType
Cleaned
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
RhsBlasTraits
::
NeedToConjugate
,
(
internal
::
traits
<
Dest
>::
Flags
&
RowMajorBit
)
?
RowMajor
:
ColMajor
,
Dest
::
InnerStrideAtCompileTime
>
::
run
(
stripedRows
,
stripedCols
,
stripedDepth
,
// sizes
&
lhs
.
coeffRef
(
0
,
0
),
lhs
.
outerStride
(),
// lhs info
&
rhs
.
coeffRef
(
0
,
0
),
rhs
.
outerStride
(),
// rhs info
&
dst
.
coeffRef
(
0
,
0
),
dst
.
outerStride
(),
// result info
&
lhs
.
coeffRef
(
0
,
0
),
lhs
.
outerStride
(),
// lhs info
&
rhs
.
coeffRef
(
0
,
0
),
rhs
.
outerStride
(),
// rhs info
&
dst
.
coeffRef
(
0
,
0
),
dst
.
innerStride
(),
dst
.
outerStride
(),
// result info
actualAlpha
,
blocking
);
// Apply correction if the diagonal is unit and a scalar factor was nested:
if
((
Mode
&
UnitDiag
)
==
UnitDiag
)
{
if
(
LhsIsTriangular
&&
lhs_alpha
!=
LhsScalar
(
1
))
{
Index
diagSize
=
(
std
::
min
)(
lhs
.
rows
(),
lhs
.
cols
());
dst
.
topRows
(
diagSize
)
-=
((
lhs_alpha
-
LhsScalar
(
1
))
*
a_rhs
).
topRows
(
diagSize
);
}
else
if
((
!
LhsIsTriangular
)
&&
rhs_alpha
!=
RhsScalar
(
1
))
{
Index
diagSize
=
(
std
::
min
)(
rhs
.
rows
(),
rhs
.
cols
());
dst
.
leftCols
(
diagSize
)
-=
(
rhs_alpha
-
RhsScalar
(
1
))
*
a_lhs
.
leftCols
(
diagSize
);
}
}
}
};
}
// end namespace internal
}
// end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_H
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
0 → 100644
View file @
13b115ab
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
#define EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
namespace
Eigen
{
namespace
internal
{
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
bool
LhsIsTriangular
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResStorageOrder
>
struct
product_triangular_matrix_matrix_trmm
:
product_triangular_matrix_matrix
<
Scalar
,
Index
,
Mode
,
LhsIsTriangular
,
LhsStorageOrder
,
ConjugateLhs
,
RhsStorageOrder
,
ConjugateRhs
,
ResStorageOrder
,
1
,
BuiltIn
>
{};
// try to go to BLAS specialization
#define EIGEN_BLAS_TRMM_SPECIALIZE(Scalar, LhsIsTriangular) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
eigen_assert(resIncr == 1); \
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
} \
};
EIGEN_BLAS_TRMM_SPECIALIZE
(
double
,
true
)
EIGEN_BLAS_TRMM_SPECIALIZE
(
double
,
false
)
EIGEN_BLAS_TRMM_SPECIALIZE
(
dcomplex
,
true
)
EIGEN_BLAS_TRMM_SPECIALIZE
(
dcomplex
,
false
)
EIGEN_BLAS_TRMM_SPECIALIZE
(
float
,
true
)
EIGEN_BLAS_TRMM_SPECIALIZE
(
float
,
false
)
EIGEN_BLAS_TRMM_SPECIALIZE
(
scomplex
,
true
)
EIGEN_BLAS_TRMM_SPECIALIZE
(
scomplex
,
false
)
// implements col-major += alpha * op(triangular) * op(general)
#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
}; \
\
static void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
{ \
Index diagSize = (std::min)(_rows,_depth); \
Index rows = IsLower ? _rows : diagSize; \
Index depth = IsLower ? diagSize : _depth; \
Index cols = _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/
\
if (rows != depth) { \
\
/* FIXME handle mkl_domain_get_max_threads */
\
/*int nthr = mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS);*/
int nthr = 1;\
\
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
/* Most likely no benefit to call TRMM or GEMM from BLAS */
\
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/
\
} else { \
/* Make sense to call GEMM */
\
Map<const MatrixLhs, 0, OuterStride<> > lhsMap(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs aa_tmp=lhsMap.template triangularView<Mode>(); \
BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
rows, cols, depth, aa_tmp.data(), aStride, _rhs, rhsStride, res, 1, resStride, alpha, gemm_blocking, 0); \
\
/*std::cout << "TRMM_L: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/
\
} \
return; \
} \
char side = 'L', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
BlasIndex m, n, lda, ldb; \
\
/* Set m, n */
\
m = convert_index<BlasIndex>(diagSize); \
n = convert_index<BlasIndex>(cols); \
\
/* Set trans */
\
transa = (LhsStorageOrder==RowMajor) ? ((ConjugateLhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */
\
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols,OuterStride<>(rhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateRhs) b_tmp = rhs.conjugate(); else b_tmp = rhs; \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\
/* Set uplo */
\
uplo = IsLower ? 'L' : 'U'; \
if (LhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */
\
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixLhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = lhs.conjugate(); else a_tmp = lhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _lhs; \
lda = convert_index<BlasIndex>(lhsStride); \
} \
/*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/
\
/* call ?trmm*/
\
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/
\
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_L
(
double
,
double
,
d
,
dtrmm
)
EIGEN_BLAS_TRMM_L
(
dcomplex
,
MKL_Complex16
,
cd
,
ztrmm
)
EIGEN_BLAS_TRMM_L
(
float
,
float
,
f
,
strmm
)
EIGEN_BLAS_TRMM_L
(
scomplex
,
MKL_Complex8
,
cf
,
ctrmm
)
#else
EIGEN_BLAS_TRMM_L
(
double
,
double
,
d
,
dtrmm_
)
EIGEN_BLAS_TRMM_L
(
dcomplex
,
double
,
cd
,
ztrmm_
)
EIGEN_BLAS_TRMM_L
(
float
,
float
,
f
,
strmm_
)
EIGEN_BLAS_TRMM_L
(
scomplex
,
float
,
cf
,
ctrmm_
)
#endif
// implements col-major += alpha * op(general) * op(triangular)
#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs,RhsStorageOrder,ConjugateRhs,ColMajor> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper, \
conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
}; \
\
static void run( \
Index _rows, Index _cols, Index _depth, \
const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsStride, \
EIGTYPE* res, Index resStride, \
EIGTYPE alpha, level3_blocking<EIGTYPE,EIGTYPE>& blocking) \
{ \
Index diagSize = (std::min)(_cols,_depth); \
Index rows = _rows; \
Index depth = IsLower ? _depth : diagSize; \
Index cols = IsLower ? diagSize : _cols; \
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, LhsStorageOrder> MatrixLhs; \
typedef Matrix<EIGTYPE, Dynamic, Dynamic, RhsStorageOrder> MatrixRhs; \
\
/* Non-square case - doesn't fit to BLAS ?TRMM. Fall to default triangular product or call BLAS ?GEMM*/
\
if (cols != depth) { \
\
int nthr = 1
/*mkl_domain_get_max_threads(EIGEN_BLAS_DOMAIN_BLAS)*/
; \
\
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
/* Most likely no benefit to call TRMM or GEMM from BLAS*/
\
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/
\
} else { \
/* Make sense to call GEMM */
\
Map<const MatrixRhs, 0, OuterStride<> > rhsMap(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs aa_tmp=rhsMap.template triangularView<Mode>(); \
BlasIndex aStride = convert_index<BlasIndex>(aa_tmp.outerStride()); \
gemm_blocking_space<ColMajor,EIGTYPE,EIGTYPE,Dynamic,Dynamic,Dynamic> gemm_blocking(_rows,_cols,_depth, 1, true); \
general_matrix_matrix_product<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,RhsStorageOrder,ConjugateRhs,ColMajor,1>::run( \
rows, cols, depth, _lhs, lhsStride, aa_tmp.data(), aStride, res, 1, resStride, alpha, gemm_blocking, 0); \
\
/*std::cout << "TRMM_R: A is not square! Go to BLAS GEMM implementation! " << nthr<<" \n";*/
\
} \
return; \
} \
char side = 'R', transa, uplo, diag = 'N'; \
EIGTYPE *b; \
const EIGTYPE *a; \
BlasIndex m, n, lda, ldb; \
\
/* Set m, n */
\
m = convert_index<BlasIndex>(rows); \
n = convert_index<BlasIndex>(diagSize); \
\
/* Set trans */
\
transa = (RhsStorageOrder==RowMajor) ? ((ConjugateRhs) ? 'C' : 'T') : 'N'; \
\
/* Set b, ldb */
\
Map<const MatrixLhs, 0, OuterStride<> > lhs(_lhs,rows,depth,OuterStride<>(lhsStride)); \
MatrixX##EIGPREFIX b_tmp; \
\
if (ConjugateLhs) b_tmp = lhs.conjugate(); else b_tmp = lhs; \
b = b_tmp.data(); \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
\
/* Set uplo */
\
uplo = IsLower ? 'L' : 'U'; \
if (RhsStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */
\
Map<const MatrixRhs, 0, OuterStride<> > rhs(_rhs,depth,cols, OuterStride<>(rhsStride)); \
MatrixRhs a_tmp; \
\
if ((conjA!=0) || (SetDiag==0)) { \
if (conjA) a_tmp = rhs.conjugate(); else a_tmp = rhs; \
if (IsZeroDiag) \
a_tmp.diagonal().setZero(); \
else if (IsUnitDiag) \
a_tmp.diagonal().setOnes();\
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _rhs; \
lda = convert_index<BlasIndex>(rhsStride); \
} \
/*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/
\
/* call ?trmm*/
\
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/
\
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
res_tmp=res_tmp+b_tmp; \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMM_R
(
double
,
double
,
d
,
dtrmm
)
EIGEN_BLAS_TRMM_R
(
dcomplex
,
MKL_Complex16
,
cd
,
ztrmm
)
EIGEN_BLAS_TRMM_R
(
float
,
float
,
f
,
strmm
)
EIGEN_BLAS_TRMM_R
(
scomplex
,
MKL_Complex8
,
cf
,
ctrmm
)
#else
EIGEN_BLAS_TRMM_R
(
double
,
double
,
d
,
dtrmm_
)
EIGEN_BLAS_TRMM_R
(
dcomplex
,
double
,
cd
,
ztrmm_
)
EIGEN_BLAS_TRMM_R
(
float
,
float
,
f
,
strmm_
)
EIGEN_BLAS_TRMM_R
(
scomplex
,
float
,
cf
,
ctrmm_
)
#endif
}
// end namespace internal
}
// end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_MATRIX_BLAS_H
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularMatrixVector.h
View file @
13b115ab
...
...
@@ -10,7 +10,7 @@
#ifndef EIGEN_TRIANGULARMATRIXVECTOR_H
#define EIGEN_TRIANGULARMATRIXVECTOR_H
namespace
Eigen
{
namespace
Eigen
{
namespace
internal
{
...
...
@@ -20,14 +20,20 @@ struct triangular_matrix_vector_product;
template
<
typename
Index
,
int
Mode
,
typename
LhsScalar
,
bool
ConjLhs
,
typename
RhsScalar
,
bool
ConjRhs
,
int
Version
>
struct
triangular_matrix_vector_product
<
Index
,
Mode
,
LhsScalar
,
ConjLhs
,
RhsScalar
,
ConjRhs
,
ColMajor
,
Version
>
{
typedef
typename
s
calar
_product_t
raits
<
LhsScalar
,
RhsScalar
>::
ReturnType
ResScalar
;
typedef
typename
S
calar
BinaryOpT
raits
<
LhsScalar
,
RhsScalar
>::
ReturnType
ResScalar
;
enum
{
IsLower
=
((
Mode
&
Lower
)
==
Lower
),
HasUnitDiag
=
(
Mode
&
UnitDiag
)
==
UnitDiag
,
HasZeroDiag
=
(
Mode
&
ZeroDiag
)
==
ZeroDiag
};
static
EIGEN_DONT_INLINE
void
run
(
Index
_rows
,
Index
_cols
,
const
LhsScalar
*
_lhs
,
Index
lhsStride
,
const
RhsScalar
*
_rhs
,
Index
rhsIncr
,
ResScalar
*
_res
,
Index
resIncr
,
ResScalar
alpha
)
const
RhsScalar
*
_rhs
,
Index
rhsIncr
,
ResScalar
*
_res
,
Index
resIncr
,
const
RhsScalar
&
alpha
);
};
template
<
typename
Index
,
int
Mode
,
typename
LhsScalar
,
bool
ConjLhs
,
typename
RhsScalar
,
bool
ConjRhs
,
int
Version
>
EIGEN_DONT_INLINE
void
triangular_matrix_vector_product
<
Index
,
Mode
,
LhsScalar
,
ConjLhs
,
RhsScalar
,
ConjRhs
,
ColMajor
,
Version
>
::
run
(
Index
_rows
,
Index
_cols
,
const
LhsScalar
*
_lhs
,
Index
lhsStride
,
const
RhsScalar
*
_rhs
,
Index
rhsIncr
,
ResScalar
*
_res
,
Index
resIncr
,
const
RhsScalar
&
alpha
)
{
static
const
Index
PanelWidth
=
EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
;
Index
size
=
(
std
::
min
)(
_rows
,
_cols
);
...
...
@@ -37,7 +43,7 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef
Map
<
const
Matrix
<
LhsScalar
,
Dynamic
,
Dynamic
,
ColMajor
>
,
0
,
OuterStride
<>
>
LhsMap
;
const
LhsMap
lhs
(
_lhs
,
rows
,
cols
,
OuterStride
<>
(
lhsStride
));
typename
conj_expr_if
<
ConjLhs
,
LhsMap
>::
type
cjLhs
(
lhs
);
typedef
Map
<
const
Matrix
<
RhsScalar
,
Dynamic
,
1
>
,
0
,
InnerStride
<>
>
RhsMap
;
const
RhsMap
rhs
(
_rhs
,
cols
,
InnerStride
<>
(
rhsIncr
));
typename
conj_expr_if
<
ConjRhs
,
RhsMap
>::
type
cjRhs
(
rhs
);
...
...
@@ -45,6 +51,9 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef
Map
<
Matrix
<
ResScalar
,
Dynamic
,
1
>
>
ResMap
;
ResMap
res
(
_res
,
rows
);
typedef
const_blas_data_mapper
<
LhsScalar
,
Index
,
ColMajor
>
LhsMapper
;
typedef
const_blas_data_mapper
<
RhsScalar
,
Index
,
RowMajor
>
RhsMapper
;
for
(
Index
pi
=
0
;
pi
<
size
;
pi
+=
PanelWidth
)
{
Index
actualPanelWidth
=
(
std
::
min
)(
PanelWidth
,
size
-
pi
);
...
...
@@ -62,35 +71,40 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
if
(
r
>
0
)
{
Index
s
=
IsLower
?
pi
+
actualPanelWidth
:
0
;
general_matrix_vector_product
<
Index
,
LhsScalar
,
ColMajor
,
ConjLhs
,
RhsScalar
,
ConjRhs
,
BuiltIn
>::
run
(
general_matrix_vector_product
<
Index
,
LhsScalar
,
LhsMapper
,
ColMajor
,
ConjLhs
,
RhsScalar
,
RhsMapper
,
ConjRhs
,
BuiltIn
>::
run
(
r
,
actualPanelWidth
,
&
lhs
.
coeffRef
(
s
,
pi
),
lhsStride
,
&
rhs
.
coeffRef
(
pi
),
rhsIncr
,
LhsMapper
(
&
lhs
.
coeffRef
(
s
,
pi
),
lhsStride
)
,
RhsMapper
(
&
rhs
.
coeffRef
(
pi
),
rhsIncr
)
,
&
res
.
coeffRef
(
s
),
resIncr
,
alpha
);
}
}
if
((
!
IsLower
)
&&
cols
>
size
)
{
general_matrix_vector_product
<
Index
,
LhsScalar
,
ColMajor
,
ConjLhs
,
RhsScalar
,
ConjRhs
>::
run
(
general_matrix_vector_product
<
Index
,
LhsScalar
,
LhsMapper
,
ColMajor
,
ConjLhs
,
RhsScalar
,
RhsMapper
,
ConjRhs
>::
run
(
rows
,
cols
-
size
,
&
lhs
.
coeffRef
(
0
,
size
),
lhsStride
,
&
rhs
.
coeffRef
(
size
),
rhsIncr
,
LhsMapper
(
&
lhs
.
coeffRef
(
0
,
size
),
lhsStride
)
,
RhsMapper
(
&
rhs
.
coeffRef
(
size
),
rhsIncr
)
,
_res
,
resIncr
,
alpha
);
}
}
};
template
<
typename
Index
,
int
Mode
,
typename
LhsScalar
,
bool
ConjLhs
,
typename
RhsScalar
,
bool
ConjRhs
,
int
Version
>
struct
triangular_matrix_vector_product
<
Index
,
Mode
,
LhsScalar
,
ConjLhs
,
RhsScalar
,
ConjRhs
,
RowMajor
,
Version
>
{
typedef
typename
s
calar
_product_t
raits
<
LhsScalar
,
RhsScalar
>::
ReturnType
ResScalar
;
typedef
typename
S
calar
BinaryOpT
raits
<
LhsScalar
,
RhsScalar
>::
ReturnType
ResScalar
;
enum
{
IsLower
=
((
Mode
&
Lower
)
==
Lower
),
HasUnitDiag
=
(
Mode
&
UnitDiag
)
==
UnitDiag
,
HasZeroDiag
=
(
Mode
&
ZeroDiag
)
==
ZeroDiag
};
static
void
run
(
Index
_rows
,
Index
_cols
,
const
LhsScalar
*
_lhs
,
Index
lhsStride
,
const
RhsScalar
*
_rhs
,
Index
rhsIncr
,
ResScalar
*
_res
,
Index
resIncr
,
ResScalar
alpha
)
static
EIGEN_DONT_INLINE
void
run
(
Index
_rows
,
Index
_cols
,
const
LhsScalar
*
_lhs
,
Index
lhsStride
,
const
RhsScalar
*
_rhs
,
Index
rhsIncr
,
ResScalar
*
_res
,
Index
resIncr
,
const
ResScalar
&
alpha
);
};
template
<
typename
Index
,
int
Mode
,
typename
LhsScalar
,
bool
ConjLhs
,
typename
RhsScalar
,
bool
ConjRhs
,
int
Version
>
EIGEN_DONT_INLINE
void
triangular_matrix_vector_product
<
Index
,
Mode
,
LhsScalar
,
ConjLhs
,
RhsScalar
,
ConjRhs
,
RowMajor
,
Version
>
::
run
(
Index
_rows
,
Index
_cols
,
const
LhsScalar
*
_lhs
,
Index
lhsStride
,
const
RhsScalar
*
_rhs
,
Index
rhsIncr
,
ResScalar
*
_res
,
Index
resIncr
,
const
ResScalar
&
alpha
)
{
static
const
Index
PanelWidth
=
EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH
;
Index
diagSize
=
(
std
::
min
)(
_rows
,
_cols
);
...
...
@@ -107,7 +121,10 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
typedef
Map
<
Matrix
<
ResScalar
,
Dynamic
,
1
>
,
0
,
InnerStride
<>
>
ResMap
;
ResMap
res
(
_res
,
rows
,
InnerStride
<>
(
resIncr
));
typedef
const_blas_data_mapper
<
LhsScalar
,
Index
,
RowMajor
>
LhsMapper
;
typedef
const_blas_data_mapper
<
RhsScalar
,
Index
,
RowMajor
>
RhsMapper
;
for
(
Index
pi
=
0
;
pi
<
diagSize
;
pi
+=
PanelWidth
)
{
Index
actualPanelWidth
=
(
std
::
min
)(
PanelWidth
,
diagSize
-
pi
);
...
...
@@ -125,105 +142,88 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
if
(
r
>
0
)
{
Index
s
=
IsLower
?
0
:
pi
+
actualPanelWidth
;
general_matrix_vector_product
<
Index
,
LhsScalar
,
RowMajor
,
ConjLhs
,
RhsScalar
,
ConjRhs
,
BuiltIn
>::
run
(
general_matrix_vector_product
<
Index
,
LhsScalar
,
LhsMapper
,
RowMajor
,
ConjLhs
,
RhsScalar
,
RhsMapper
,
ConjRhs
,
BuiltIn
>::
run
(
actualPanelWidth
,
r
,
&
lhs
.
coeffRef
(
pi
,
s
),
lhsStride
,
&
rhs
.
coeffRef
(
s
),
rhsIncr
,
LhsMapper
(
&
lhs
.
coeffRef
(
pi
,
s
),
lhsStride
)
,
RhsMapper
(
&
rhs
.
coeffRef
(
s
),
rhsIncr
)
,
&
res
.
coeffRef
(
pi
),
resIncr
,
alpha
);
}
}
if
(
IsLower
&&
rows
>
diagSize
)
{
general_matrix_vector_product
<
Index
,
LhsScalar
,
RowMajor
,
ConjLhs
,
RhsScalar
,
ConjRhs
>::
run
(
general_matrix_vector_product
<
Index
,
LhsScalar
,
LhsMapper
,
RowMajor
,
ConjLhs
,
RhsScalar
,
RhsMapper
,
ConjRhs
>::
run
(
rows
-
diagSize
,
cols
,
&
lhs
.
coeffRef
(
diagSize
,
0
),
lhsStride
,
&
rhs
.
coeffRef
(
0
),
rhsIncr
,
LhsMapper
(
&
lhs
.
coeffRef
(
diagSize
,
0
),
lhsStride
)
,
RhsMapper
(
&
rhs
.
coeffRef
(
0
),
rhsIncr
)
,
&
res
.
coeffRef
(
diagSize
),
resIncr
,
alpha
);
}
}
};
/***************************************************************************
* Wrapper to product_triangular_vector
***************************************************************************/
template
<
int
Mode
,
bool
LhsIsTriangular
,
typename
Lhs
,
typename
Rhs
>
struct
traits
<
TriangularProduct
<
Mode
,
LhsIsTriangular
,
Lhs
,
false
,
Rhs
,
true
>
>
:
traits
<
ProductBase
<
TriangularProduct
<
Mode
,
LhsIsTriangular
,
Lhs
,
false
,
Rhs
,
true
>
,
Lhs
,
Rhs
>
>
{};
template
<
int
Mode
,
bool
LhsIsTriangular
,
typename
Lhs
,
typename
Rhs
>
struct
traits
<
TriangularProduct
<
Mode
,
LhsIsTriangular
,
Lhs
,
true
,
Rhs
,
false
>
>
:
traits
<
ProductBase
<
TriangularProduct
<
Mode
,
LhsIsTriangular
,
Lhs
,
true
,
Rhs
,
false
>
,
Lhs
,
Rhs
>
>
{};
template
<
int
StorageOrder
>
template
<
int
Mode
,
int
StorageOrder
>
struct
trmv_selector
;
}
// end namespace internal
namespace
internal
{
template
<
int
Mode
,
typename
Lhs
,
typename
Rhs
>
struct
TriangularProduct
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>
:
public
ProductBase
<
TriangularProduct
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>
,
Lhs
,
Rhs
>
struct
triangular_product_impl
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE
(
TriangularProduct
)
TriangularProduct
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
)
:
Base
(
lhs
,
rhs
)
{}
template
<
typename
Dest
>
void
scaleAndAddTo
(
Dest
&
dst
,
Scalar
alpha
)
const
template
<
typename
Dest
>
static
void
run
(
Dest
&
dst
,
const
Lhs
&
lhs
,
const
Rhs
&
rhs
,
const
typename
Dest
::
Scalar
&
alpha
)
{
eigen_assert
(
dst
.
rows
()
==
m_
lhs
.
rows
()
&&
dst
.
cols
()
==
m_
rhs
.
cols
());
eigen_assert
(
dst
.
rows
()
==
lhs
.
rows
()
&&
dst
.
cols
()
==
rhs
.
cols
());
internal
::
trmv_selector
<
(
int
(
internal
::
traits
<
Lhs
>::
Flags
)
&
RowMajorBit
)
?
RowMajor
:
ColMajor
>::
run
(
*
thi
s
,
dst
,
alpha
);
internal
::
trmv_selector
<
Mode
,
(
int
(
internal
::
traits
<
Lhs
>::
Flags
)
&
RowMajorBit
)
?
RowMajor
:
ColMajor
>::
run
(
lhs
,
rh
s
,
dst
,
alpha
);
}
};
template
<
int
Mode
,
typename
Lhs
,
typename
Rhs
>
struct
TriangularProduct
<
Mode
,
false
,
Lhs
,
true
,
Rhs
,
false
>
:
public
ProductBase
<
TriangularProduct
<
Mode
,
false
,
Lhs
,
true
,
Rhs
,
false
>
,
Lhs
,
Rhs
>
struct
triangular_product_impl
<
Mode
,
false
,
Lhs
,
true
,
Rhs
,
false
>
{
EIGEN_PRODUCT_PUBLIC_INTERFACE
(
TriangularProduct
)
TriangularProduct
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
)
:
Base
(
lhs
,
rhs
)
{}
template
<
typename
Dest
>
void
scaleAndAddTo
(
Dest
&
dst
,
Scalar
alpha
)
const
template
<
typename
Dest
>
static
void
run
(
Dest
&
dst
,
const
Lhs
&
lhs
,
const
Rhs
&
rhs
,
const
typename
Dest
::
Scalar
&
alpha
)
{
eigen_assert
(
dst
.
rows
()
==
m_
lhs
.
rows
()
&&
dst
.
cols
()
==
m_
rhs
.
cols
());
eigen_assert
(
dst
.
rows
()
==
lhs
.
rows
()
&&
dst
.
cols
()
==
rhs
.
cols
());
typedef
TriangularProduct
<
(
Mode
&
(
UnitDiag
|
ZeroDiag
))
|
((
Mode
&
Lower
)
?
Upper
:
Lower
),
true
,
Transpose
<
const
Rhs
>
,
false
,
Transpose
<
const
Lhs
>
,
true
>
TriangularProductTranspose
;
Transpose
<
Dest
>
dstT
(
dst
);
internal
::
trmv_selector
<
(
int
(
internal
::
traits
<
Rhs
>::
Flags
)
&
RowMajorBit
)
?
ColMajor
:
RowMajor
>::
run
(
TriangularProductTranspose
(
m_rhs
.
transpose
(),
m_lhs
.
transpose
()),
dstT
,
alpha
);
internal
::
trmv_selector
<
(
Mode
&
(
UnitDiag
|
ZeroDiag
))
|
((
Mode
&
Lower
)
?
Upper
:
Lower
),
(
int
(
internal
::
traits
<
Rhs
>::
Flags
)
&
RowMajorBit
)
?
ColMajor
:
RowMajor
>
::
run
(
rhs
.
transpose
(),
lhs
.
transpose
(),
dstT
,
alpha
);
}
};
}
// end namespace internal
namespace
internal
{
// TODO: find a way to factorize this piece of code with gemv_selector since the logic is exactly the same.
template
<
>
struct
trmv_selector
<
ColMajor
>
template
<
int
Mode
>
struct
trmv_selector
<
Mode
,
ColMajor
>
{
template
<
int
Mode
,
typename
Lhs
,
typename
Rhs
,
typename
Dest
>
static
void
run
(
const
TriangularProduct
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>&
prod
,
Dest
&
dest
,
typename
TriangularProduct
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>
::
Scalar
alpha
)
template
<
typename
Lhs
,
typename
Rhs
,
typename
Dest
>
static
void
run
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
,
Dest
&
dest
,
const
typename
Dest
::
Scalar
&
alpha
)
{
typedef
TriangularProduct
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>
ProductType
;
typedef
typename
ProductType
::
Index
Index
;
typedef
typename
ProductType
::
LhsScalar
LhsScalar
;
typedef
typename
ProductType
::
RhsScalar
RhsScalar
;
typedef
typename
ProductType
::
Scalar
ResScalar
;
typedef
typename
ProductType
::
RealScalar
RealScalar
;
typedef
typename
ProductType
::
ActualLhsType
ActualLhsType
;
typedef
typename
ProductType
::
ActualRhsType
ActualRhsType
;
typedef
typename
ProductType
::
LhsBlasTraits
LhsBlasTraits
;
typedef
typename
ProductType
::
RhsBlasTraits
RhsBlasTraits
;
typedef
Map
<
Matrix
<
ResScalar
,
Dynamic
,
1
>
,
Aligned
>
MappedDest
;
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
actualLhs
=
LhsBlasTraits
::
extract
(
prod
.
lhs
());
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
actualRhs
=
RhsBlasTraits
::
extract
(
prod
.
rhs
());
ResScalar
actualAlpha
=
alpha
*
LhsBlasTraits
::
extractScalarFactor
(
prod
.
lhs
())
*
RhsBlasTraits
::
extractScalarFactor
(
prod
.
rhs
());
typedef
typename
Lhs
::
Scalar
LhsScalar
;
typedef
typename
Rhs
::
Scalar
RhsScalar
;
typedef
typename
Dest
::
Scalar
ResScalar
;
typedef
typename
Dest
::
RealScalar
RealScalar
;
typedef
internal
::
blas_traits
<
Lhs
>
LhsBlasTraits
;
typedef
typename
LhsBlasTraits
::
DirectLinearAccessType
ActualLhsType
;
typedef
internal
::
blas_traits
<
Rhs
>
RhsBlasTraits
;
typedef
typename
RhsBlasTraits
::
DirectLinearAccessType
ActualRhsType
;
typedef
Map
<
Matrix
<
ResScalar
,
Dynamic
,
1
>
,
EIGEN_PLAIN_ENUM_MIN
(
AlignedMax
,
internal
::
packet_traits
<
ResScalar
>::
size
)
>
MappedDest
;
typename
internal
::
add_const_on_value_type
<
ActualLhsType
>::
type
actualLhs
=
LhsBlasTraits
::
extract
(
lhs
);
typename
internal
::
add_const_on_value_type
<
ActualRhsType
>::
type
actualRhs
=
RhsBlasTraits
::
extract
(
rhs
);
LhsScalar
lhs_alpha
=
LhsBlasTraits
::
extractScalarFactor
(
lhs
);
RhsScalar
rhs_alpha
=
RhsBlasTraits
::
extractScalarFactor
(
rhs
);
ResScalar
actualAlpha
=
alpha
*
lhs_alpha
*
rhs_alpha
;
enum
{
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
...
...
@@ -235,9 +235,9 @@ template<> struct trmv_selector<ColMajor>
gemv_static_vector_if
<
ResScalar
,
Dest
::
SizeAtCompileTime
,
Dest
::
MaxSizeAtCompileTime
,
MightCannotUseDest
>
static_dest
;
bool
alphaIsCompatible
=
(
!
ComplexByReal
)
||
(
imag
(
actualAlpha
)
==
RealScalar
(
0
));
bool
alphaIsCompatible
=
(
!
ComplexByReal
)
||
(
numext
::
imag
(
actualAlpha
)
==
RealScalar
(
0
));
bool
evalToDest
=
EvalToDestAtCompileTime
&&
alphaIsCompatible
;
RhsScalar
compatibleAlpha
=
get_factor
<
ResScalar
,
RhsScalar
>::
run
(
actualAlpha
);
ei_declare_aligned_stack_constructed_variable
(
ResScalar
,
actualDestPtr
,
dest
.
size
(),
...
...
@@ -246,7 +246,7 @@ template<> struct trmv_selector<ColMajor>
if
(
!
evalToDest
)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int
size
=
dest
.
size
();
Index
size
=
dest
.
size
();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
if
(
!
alphaIsCompatible
)
...
...
@@ -257,7 +257,7 @@ template<> struct trmv_selector<ColMajor>
else
MappedDest
(
actualDestPtr
,
dest
.
size
())
=
dest
;
}
internal
::
triangular_matrix_vector_product
<
Index
,
Mode
,
LhsScalar
,
LhsBlasTraits
::
NeedToConjugate
,
...
...
@@ -275,36 +275,42 @@ template<> struct trmv_selector<ColMajor>
else
dest
=
MappedDest
(
actualDestPtr
,
dest
.
size
());
}
if
(
((
Mode
&
UnitDiag
)
==
UnitDiag
)
&&
(
lhs_alpha
!=
LhsScalar
(
1
))
)
{
Index
diagSize
=
(
std
::
min
)(
lhs
.
rows
(),
lhs
.
cols
());
dest
.
head
(
diagSize
)
-=
(
lhs_alpha
-
LhsScalar
(
1
))
*
rhs
.
head
(
diagSize
);
}
}
};
template
<
>
struct
trmv_selector
<
RowMajor
>
template
<
int
Mode
>
struct
trmv_selector
<
Mode
,
RowMajor
>
{
template
<
int
Mode
,
typename
Lhs
,
typename
Rhs
,
typename
Dest
>
static
void
run
(
const
TriangularProduct
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>&
prod
,
Dest
&
dest
,
typename
TriangularProduct
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>
::
Scalar
alpha
)
template
<
typename
Lhs
,
typename
Rhs
,
typename
Dest
>
static
void
run
(
const
Lhs
&
lhs
,
const
Rhs
&
rhs
,
Dest
&
dest
,
const
typename
Dest
::
Scalar
&
alpha
)
{
typedef
TriangularProduct
<
Mode
,
true
,
Lhs
,
false
,
Rhs
,
true
>
ProductType
;
typedef
typename
ProductType
::
Lhs
Scalar
L
hsScalar
;
typedef
typename
ProductType
::
Rhs
Scalar
Rh
sScalar
;
typedef
typename
ProductType
::
Scalar
ResScalar
;
typedef
typename
ProductType
::
Index
Index
;
typedef
typename
ProductType
::
ActualLh
sType
ActualLhsType
;
typedef
typename
ProductType
::
ActualRhsType
ActualRhsType
;
typedef
typename
ProductType
::
_ActualRh
sType
_
ActualRhsType
;
typedef
typename
Product
Type
::
LhsBlasTraits
LhsBlasTraits
;
typedef
typename
ProductType
::
RhsBlasTraits
RhsBlasTraits
;
typename
add_const
<
Actual
L
hsType
>::
type
actual
L
hs
=
L
hsBlasTraits
::
extract
(
prod
.
lhs
()
);
typename
add_const
<
ActualRhsType
>::
type
actualRhs
=
RhsBlasTraits
::
extract
(
prod
.
rhs
());
R
e
sScalar
actualAlpha
=
alpha
*
L
hsBlasTraits
::
extractScalarFactor
(
prod
.
lhs
())
*
R
hs
BlasTraits
::
extractScalarFactor
(
prod
.
rhs
())
;
typedef
typename
Lhs
::
Scalar
LhsScalar
;
typedef
typename
Rhs
::
Scalar
R
hsScalar
;
typedef
typename
Dest
::
Scalar
Re
sScalar
;
typedef
internal
::
blas_traits
<
Lhs
>
LhsBlasTraits
;
typedef
typename
LhsBlasTraits
::
DirectLinearAcces
sType
ActualLhsType
;
typedef
internal
::
blas_traits
<
Rhs
>
RhsBlasTraits
;
typedef
typename
RhsBlasTraits
::
DirectLinearAcces
sType
ActualRhsType
;
typedef
typename
internal
::
remove_all
<
ActualRhs
Type
>
::
type
ActualRhsTypeCleaned
;
typename
add_const
<
ActualLhsType
>::
type
actualLhs
=
LhsBlasTraits
::
extract
(
lhs
);
typename
add_const
<
Actual
R
hsType
>::
type
actual
R
hs
=
R
hsBlasTraits
::
extract
(
rhs
);
LhsScalar
lhs_alpha
=
LhsBlasTraits
::
extractScalarFactor
(
lhs
);
R
h
sScalar
rhs_
alpha
=
R
hsBlasTraits
::
extractScalarFactor
(
rhs
);
ResScalar
actualAlpha
=
alpha
*
l
hs
_alpha
*
rhs_alpha
;
enum
{
DirectlyUseRhs
=
_
ActualRhsType
::
InnerStrideAtCompileTime
==
1
DirectlyUseRhs
=
ActualRhsType
Cleaned
::
InnerStrideAtCompileTime
==
1
};
gemv_static_vector_if
<
RhsScalar
,
_
ActualRhsType
::
SizeAtCompileTime
,
_
ActualRhsType
::
MaxSizeAtCompileTime
,
!
DirectlyUseRhs
>
static_rhs
;
gemv_static_vector_if
<
RhsScalar
,
ActualRhsType
Cleaned
::
SizeAtCompileTime
,
ActualRhsType
Cleaned
::
MaxSizeAtCompileTime
,
!
DirectlyUseRhs
>
static_rhs
;
ei_declare_aligned_stack_constructed_variable
(
RhsScalar
,
actualRhsPtr
,
actualRhs
.
size
(),
DirectlyUseRhs
?
const_cast
<
RhsScalar
*>
(
actualRhs
.
data
())
:
static_rhs
.
data
());
...
...
@@ -312,12 +318,12 @@ template<> struct trmv_selector<RowMajor>
if
(
!
DirectlyUseRhs
)
{
#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
int
size
=
actualRhs
.
size
();
Index
size
=
actualRhs
.
size
();
EIGEN_DENSE_STORAGE_CTOR_PLUGIN
#endif
Map
<
typename
_
ActualRhsType
::
PlainObject
>
(
actualRhsPtr
,
actualRhs
.
size
())
=
actualRhs
;
Map
<
typename
ActualRhsType
Cleaned
::
PlainObject
>
(
actualRhsPtr
,
actualRhs
.
size
())
=
actualRhs
;
}
internal
::
triangular_matrix_vector_product
<
Index
,
Mode
,
LhsScalar
,
LhsBlasTraits
::
NeedToConjugate
,
...
...
@@ -328,6 +334,12 @@ template<> struct trmv_selector<RowMajor>
actualRhsPtr
,
1
,
dest
.
data
(),
dest
.
innerStride
(),
actualAlpha
);
if
(
((
Mode
&
UnitDiag
)
==
UnitDiag
)
&&
(
lhs_alpha
!=
LhsScalar
(
1
))
)
{
Index
diagSize
=
(
std
::
min
)(
lhs
.
rows
(),
lhs
.
cols
());
dest
.
head
(
diagSize
)
-=
(
lhs_alpha
-
LhsScalar
(
1
))
*
rhs
.
head
(
diagSize
);
}
}
};
...
...
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
0 → 100644
View file @
13b115ab
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Triangular matrix-vector product functionality based on ?TRMV.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
#define EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
namespace
Eigen
{
namespace
internal
{
/**********************************************************************
* This file implements triangular matrix-vector multiplication using BLAS
**********************************************************************/
// trmv/hemv specialization
template
<
typename
Index
,
int
Mode
,
typename
LhsScalar
,
bool
ConjLhs
,
typename
RhsScalar
,
bool
ConjRhs
,
int
StorageOrder
>
struct
triangular_matrix_vector_product_trmv
:
triangular_matrix_vector_product
<
Index
,
Mode
,
LhsScalar
,
ConjLhs
,
RhsScalar
,
ConjRhs
,
StorageOrder
,
BuiltIn
>
{};
#define EIGEN_BLAS_TRMV_SPECIALIZE(Scalar) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
}; \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
} \
};
EIGEN_BLAS_TRMV_SPECIALIZE
(
double
)
EIGEN_BLAS_TRMV_SPECIALIZE
(
float
)
EIGEN_BLAS_TRMV_SPECIALIZE
(
dcomplex
)
EIGEN_BLAS_TRMV_SPECIALIZE
(
scomplex
)
// implements col-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (ConjLhs || IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/
\
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */
\
\
char trans, uplo, diag; \
BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \
EIGTYPE beta(1); \
\
/* Set m, n */
\
n = convert_index<BlasIndex>(size); \
lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \
incy = convert_index<BlasIndex>(resIncr); \
\
/* Set uplo, trans and diag*/
\
trans = 'N'; \
uplo = IsLower ? 'L' : 'U'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/
\
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/
\
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/
\
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size; \
m = convert_index<BlasIndex>(rows-size); \
n = convert_index<BlasIndex>(size); \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size*lda; \
m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \
} \
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_CM
(
double
,
double
,
d
,
d
,)
EIGEN_BLAS_TRMV_CM
(
dcomplex
,
MKL_Complex16
,
cd
,
z
,)
EIGEN_BLAS_TRMV_CM
(
float
,
float
,
f
,
s
,)
EIGEN_BLAS_TRMV_CM
(
scomplex
,
MKL_Complex8
,
cf
,
c
,)
#else
EIGEN_BLAS_TRMV_CM
(
double
,
double
,
d
,
d
,
_
)
EIGEN_BLAS_TRMV_CM
(
dcomplex
,
double
,
cd
,
z
,
_
)
EIGEN_BLAS_TRMV_CM
(
float
,
float
,
f
,
s
,
_
)
EIGEN_BLAS_TRMV_CM
(
scomplex
,
float
,
cf
,
c
,
_
)
#endif
// implements row-major: res += alpha * op(triangular) * vector
#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
IsLower = (Mode&Lower) == Lower, \
SetDiag = (Mode&(ZeroDiag|UnitDiag)) ? 0 : 1, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
LowUp = IsLower ? Lower : Upper \
}; \
static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
{ \
if (IsZeroDiag) { \
triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
_rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
return; \
}\
Index size = (std::min)(_rows,_cols); \
Index rows = IsLower ? _rows : size; \
Index cols = IsLower ? size : _cols; \
\
typedef VectorX##EIGPREFIX VectorRhs; \
EIGTYPE *x, *y;\
\
/* Set x*/
\
Map<const VectorRhs, 0, InnerStride<> > rhs(_rhs,cols,InnerStride<>(rhsIncr)); \
VectorRhs x_tmp; \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
\
/* Square part handling */
\
\
char trans, uplo, diag; \
BlasIndex m, n, lda, incx, incy; \
EIGTYPE const *a; \
EIGTYPE beta(1); \
\
/* Set m, n */
\
n = convert_index<BlasIndex>(size); \
lda = convert_index<BlasIndex>(lhsStride); \
incx = 1; \
incy = convert_index<BlasIndex>(resIncr); \
\
/* Set uplo, trans and diag*/
\
trans = ConjLhs ? 'C' : 'T'; \
uplo = IsLower ? 'U' : 'L'; \
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/
\
BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/
\
BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/
\
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
x = x_tmp.data(); \
if (size<rows) { \
y = _res + size*resIncr; \
a = _lhs + size*lda; \
m = convert_index<BlasIndex>(rows-size); \
n = convert_index<BlasIndex>(size); \
} \
else { \
x += size; \
y = _res; \
a = _lhs + size; \
m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \
} \
BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRMV_RM
(
double
,
double
,
d
,
d
,)
EIGEN_BLAS_TRMV_RM
(
dcomplex
,
MKL_Complex16
,
cd
,
z
,)
EIGEN_BLAS_TRMV_RM
(
float
,
float
,
f
,
s
,)
EIGEN_BLAS_TRMV_RM
(
scomplex
,
MKL_Complex8
,
cf
,
c
,)
#else
EIGEN_BLAS_TRMV_RM
(
double
,
double
,
d
,
d
,
_
)
EIGEN_BLAS_TRMV_RM
(
dcomplex
,
double
,
cd
,
z
,
_
)
EIGEN_BLAS_TRMV_RM
(
float
,
float
,
f
,
s
,
_
)
EIGEN_BLAS_TRMV_RM
(
scomplex
,
float
,
cf
,
c
,
_
)
#endif
}
// end namespase internal
}
// end namespace Eigen
#endif // EIGEN_TRIANGULAR_MATRIX_VECTOR_BLAS_H
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularSolverMatrix.h
View file @
13b115ab
...
...
@@ -15,40 +15,51 @@ namespace Eigen {
namespace
internal
{
// if the rhs is row major, let's transpose the product
template
<
typename
Scalar
,
typename
Index
,
int
Side
,
int
Mode
,
bool
Conjugate
,
int
TriStorageOrder
>
struct
triangular_solve_matrix
<
Scalar
,
Index
,
Side
,
Mode
,
Conjugate
,
TriStorageOrder
,
RowMajor
>
template
<
typename
Scalar
,
typename
Index
,
int
Side
,
int
Mode
,
bool
Conjugate
,
int
TriStorageOrder
,
int
OtherInnerStride
>
struct
triangular_solve_matrix
<
Scalar
,
Index
,
Side
,
Mode
,
Conjugate
,
TriStorageOrder
,
RowMajor
,
OtherInnerStride
>
{
static
EIGEN_DONT_INLINE
void
run
(
static
void
run
(
Index
size
,
Index
cols
,
const
Scalar
*
tri
,
Index
triStride
,
Scalar
*
_other
,
Index
otherStride
,
Scalar
*
_other
,
Index
otherIncr
,
Index
otherStride
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
triangular_solve_matrix
<
Scalar
,
Index
,
Side
==
OnTheLeft
?
OnTheRight
:
OnTheLeft
,
(
Mode
&
UnitDiag
)
|
((
Mode
&
Upper
)
?
Lower
:
Upper
),
NumTraits
<
Scalar
>::
IsComplex
&&
Conjugate
,
TriStorageOrder
==
RowMajor
?
ColMajor
:
RowMajor
,
ColMajor
>
::
run
(
size
,
cols
,
tri
,
triStride
,
_other
,
otherStride
,
blocking
);
TriStorageOrder
==
RowMajor
?
ColMajor
:
RowMajor
,
ColMajor
,
OtherInnerStride
>
::
run
(
size
,
cols
,
tri
,
triStride
,
_other
,
otherIncr
,
otherStride
,
blocking
);
}
};
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
*/
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
bool
Conjugate
,
int
TriStorageOrder
>
struct
triangular_solve_matrix
<
Scalar
,
Index
,
OnTheLeft
,
Mode
,
Conjugate
,
TriStorageOrder
,
ColMajor
>
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
bool
Conjugate
,
int
TriStorageOrder
,
int
OtherInnerStride
>
struct
triangular_solve_matrix
<
Scalar
,
Index
,
OnTheLeft
,
Mode
,
Conjugate
,
TriStorageOrder
,
ColMajor
,
OtherInnerStride
>
{
static
EIGEN_DONT_INLINE
void
run
(
Index
size
,
Index
otherSize
,
const
Scalar
*
_tri
,
Index
triStride
,
Scalar
*
_other
,
Index
otherStride
,
Scalar
*
_other
,
Index
otherIncr
,
Index
otherStride
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
);
};
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
bool
Conjugate
,
int
TriStorageOrder
,
int
OtherInnerStride
>
EIGEN_DONT_INLINE
void
triangular_solve_matrix
<
Scalar
,
Index
,
OnTheLeft
,
Mode
,
Conjugate
,
TriStorageOrder
,
ColMajor
,
OtherInnerStride
>::
run
(
Index
size
,
Index
otherSize
,
const
Scalar
*
_tri
,
Index
triStride
,
Scalar
*
_other
,
Index
otherIncr
,
Index
otherStride
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
Index
cols
=
otherSize
;
const_blas_data_mapper
<
Scalar
,
Index
,
TriStorageOrder
>
tri
(
_tri
,
triStride
);
blas_data_mapper
<
Scalar
,
Index
,
ColMajor
>
other
(
_other
,
otherStride
);
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
TriStorageOrder
>
TriMapper
;
typedef
blas_data_mapper
<
Scalar
,
Index
,
ColMajor
,
Unaligned
,
OtherInnerStride
>
OtherMapper
;
TriMapper
tri
(
_tri
,
triStride
);
OtherMapper
other
(
_other
,
otherStride
,
otherIncr
);
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
enum
{
SmallPanelWidth
=
EIGEN_PLAIN_ENUM_MAX
(
Traits
::
mr
,
Traits
::
nr
),
IsLower
=
(
Mode
&
Lower
)
==
Lower
...
...
@@ -59,22 +70,20 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
std
::
size_t
sizeA
=
kc
*
mc
;
std
::
size_t
sizeB
=
kc
*
cols
;
std
::
size_t
sizeW
=
kc
*
Traits
::
WorkSpaceFactor
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
sizeA
,
blocking
.
blockA
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockB
,
sizeB
,
blocking
.
blockB
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockW
,
sizeW
,
blocking
.
blockW
());
conj_if
<
Conjugate
>
conj
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
nr
,
Conjugate
,
false
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
TriStorageOrder
>
pack_lhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
ColMajor
,
false
,
true
>
pack_rhs
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
OtherMapper
,
Traits
::
mr
,
Traits
::
nr
,
Conjugate
,
false
>
gebp_kernel
;
gemm_pack_lhs
<
Scalar
,
Index
,
TriMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
TriStorageOrder
>
pack_lhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
OtherMapper
,
Traits
::
nr
,
ColMajor
,
false
,
true
>
pack_rhs
;
// the goal here is to subdivise the Rhs panels such that we keep some cache
// coherence when accessing the rhs elements
std
::
ptrdiff_t
l1
,
l2
;
manage_caching_sizes
(
GetAction
,
&
l1
,
&
l2
);
Index
subcols
=
cols
>
0
?
l2
/
(
4
*
sizeof
(
Scalar
)
*
otherStride
)
:
0
;
std
::
ptrdiff_t
l1
,
l2
,
l3
;
manage_caching_sizes
(
GetAction
,
&
l1
,
&
l2
,
&
l3
);
Index
subcols
=
cols
>
0
?
l2
/
(
4
*
sizeof
(
Scalar
)
*
std
::
max
<
Index
>
(
otherStride
,
size
)
)
:
0
;
subcols
=
std
::
max
<
Index
>
((
subcols
/
Traits
::
nr
)
*
Traits
::
nr
,
Traits
::
nr
);
for
(
Index
k2
=
IsLower
?
0
:
size
;
...
...
@@ -108,8 +117,9 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
{
// TODO write a small kernel handling this (can be shared with trsv)
Index
i
=
IsLower
?
k2
+
k1
+
k
:
k2
-
k1
-
k
-
1
;
Index
s
=
IsLower
?
k2
+
k1
:
i
+
1
;
Index
rs
=
actualPanelWidth
-
k
-
1
;
// remaining size
Index
s
=
TriStorageOrder
==
RowMajor
?
(
IsLower
?
k2
+
k1
:
i
+
1
)
:
IsLower
?
i
+
1
:
i
-
rs
;
Scalar
a
=
(
Mode
&
UnitDiag
)
?
Scalar
(
1
)
:
Scalar
(
1
)
/
conj
(
tri
(
i
,
i
));
for
(
Index
j
=
j2
;
j
<
j2
+
actual_cols
;
++
j
)
...
...
@@ -118,20 +128,19 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
{
Scalar
b
(
0
);
const
Scalar
*
l
=
&
tri
(
i
,
s
);
Scalar
*
r
=
&
other
(
s
,
j
);
typename
OtherMapper
::
LinearMapper
r
=
other
.
getLinearMapper
(
s
,
j
);
for
(
Index
i3
=
0
;
i3
<
k
;
++
i3
)
b
+=
conj
(
l
[
i3
])
*
r
[
i3
]
;
b
+=
conj
(
l
[
i3
])
*
r
(
i3
)
;
other
(
i
,
j
)
=
(
other
(
i
,
j
)
-
b
)
*
a
;
}
else
{
Index
s
=
IsLower
?
i
+
1
:
i
-
rs
;
Scalar
b
=
(
other
(
i
,
j
)
*=
a
);
Scalar
*
r
=
&
other
(
s
,
j
);
const
Scalar
*
l
=
&
tri
(
s
,
i
);
typename
OtherMapper
::
LinearMapper
r
=
other
.
getLinearMapper
(
s
,
j
);
typename
TriMapper
::
LinearMapper
l
=
tri
.
getLinearMapper
(
s
,
i
);
for
(
Index
i3
=
0
;
i3
<
rs
;
++
i3
)
r
[
i3
]
-=
b
*
conj
(
l
[
i3
]
);
r
(
i3
)
-=
b
*
conj
(
l
(
i3
)
);
}
}
}
...
...
@@ -141,17 +150,17 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
Index
blockBOffset
=
IsLower
?
k1
:
lengthTarget
;
// update the respective rows of B from other
pack_rhs
(
blockB
+
actual_kc
*
j2
,
&
other
(
startBlock
,
j2
),
otherStride
,
actualPanelWidth
,
actual_cols
,
actual_kc
,
blockBOffset
);
pack_rhs
(
blockB
+
actual_kc
*
j2
,
other
.
getSubMapper
(
startBlock
,
j2
),
actualPanelWidth
,
actual_cols
,
actual_kc
,
blockBOffset
);
// GEBP
if
(
lengthTarget
>
0
)
{
Index
startTarget
=
IsLower
?
k2
+
k1
+
actualPanelWidth
:
k2
-
actual_kc
;
pack_lhs
(
blockA
,
&
tri
(
startTarget
,
startBlock
),
triStride
,
actualPanelWidth
,
lengthTarget
);
pack_lhs
(
blockA
,
tri
.
getSubMapper
(
startTarget
,
startBlock
),
actualPanelWidth
,
lengthTarget
);
gebp_kernel
(
&
other
(
startTarget
,
j2
),
otherStride
,
blockA
,
blockB
+
actual_kc
*
j2
,
lengthTarget
,
actualPanelWidth
,
actual_cols
,
Scalar
(
-
1
),
actualPanelWidth
,
actual_kc
,
0
,
blockBOffset
,
blockW
);
gebp_kernel
(
other
.
getSubMapper
(
startTarget
,
j2
),
blockA
,
blockB
+
actual_kc
*
j2
,
lengthTarget
,
actualPanelWidth
,
actual_cols
,
Scalar
(
-
1
),
actualPanelWidth
,
actual_kc
,
0
,
blockBOffset
);
}
}
}
...
...
@@ -165,30 +174,40 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
const
Index
actual_mc
=
(
std
::
min
)(
mc
,
end
-
i2
);
if
(
actual_mc
>
0
)
{
pack_lhs
(
blockA
,
&
tri
(
i2
,
IsLower
?
k2
:
k2
-
kc
),
triStride
,
actual_kc
,
actual_mc
);
pack_lhs
(
blockA
,
tri
.
getSubMapper
(
i2
,
IsLower
?
k2
:
k2
-
kc
),
actual_kc
,
actual_mc
);
gebp_kernel
(
_
other
+
i2
,
otherStride
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
Scalar
(
-
1
),
-
1
,
-
1
,
0
,
0
,
blockW
);
gebp_kernel
(
other
.
getSubMapper
(
i2
,
0
)
,
blockA
,
blockB
,
actual_mc
,
actual_kc
,
cols
,
Scalar
(
-
1
),
-
1
,
-
1
,
0
,
0
);
}
}
}
}
}
};
/* Optimized triangular solver with multiple left hand sides and the tri
n
agular matrix on the right
/* Optimized triangular solver with multiple left hand sides and the tria
n
gular matrix on the right
*/
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
bool
Conjugate
,
int
TriStorageOrder
>
struct
triangular_solve_matrix
<
Scalar
,
Index
,
OnTheRight
,
Mode
,
Conjugate
,
TriStorageOrder
,
ColMajor
>
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
bool
Conjugate
,
int
TriStorageOrder
,
int
OtherInnerStride
>
struct
triangular_solve_matrix
<
Scalar
,
Index
,
OnTheRight
,
Mode
,
Conjugate
,
TriStorageOrder
,
ColMajor
,
OtherInnerStride
>
{
static
EIGEN_DONT_INLINE
void
run
(
Index
size
,
Index
otherSize
,
const
Scalar
*
_tri
,
Index
triStride
,
Scalar
*
_other
,
Index
otherStride
,
Scalar
*
_other
,
Index
otherIncr
,
Index
otherStride
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
);
};
template
<
typename
Scalar
,
typename
Index
,
int
Mode
,
bool
Conjugate
,
int
TriStorageOrder
,
int
OtherInnerStride
>
EIGEN_DONT_INLINE
void
triangular_solve_matrix
<
Scalar
,
Index
,
OnTheRight
,
Mode
,
Conjugate
,
TriStorageOrder
,
ColMajor
,
OtherInnerStride
>::
run
(
Index
size
,
Index
otherSize
,
const
Scalar
*
_tri
,
Index
triStride
,
Scalar
*
_other
,
Index
otherIncr
,
Index
otherStride
,
level3_blocking
<
Scalar
,
Scalar
>&
blocking
)
{
Index
rows
=
otherSize
;
const_blas_data_mapper
<
Scalar
,
Index
,
TriStorageOrder
>
rhs
(
_tri
,
triStride
);
blas_data_mapper
<
Scalar
,
Index
,
ColMajor
>
lhs
(
_other
,
otherStride
);
typedef
typename
NumTraits
<
Scalar
>::
Real
RealScalar
;
typedef
blas_data_mapper
<
Scalar
,
Index
,
ColMajor
,
Unaligned
,
OtherInnerStride
>
LhsMapper
;
typedef
const_blas_data_mapper
<
Scalar
,
Index
,
TriStorageOrder
>
RhsMapper
;
LhsMapper
lhs
(
_other
,
otherStride
,
otherIncr
);
RhsMapper
rhs
(
_tri
,
triStride
);
typedef
gebp_traits
<
Scalar
,
Scalar
>
Traits
;
enum
{
...
...
@@ -202,17 +221,15 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
std
::
size_t
sizeA
=
kc
*
mc
;
std
::
size_t
sizeB
=
kc
*
size
;
std
::
size_t
sizeW
=
kc
*
Traits
::
WorkSpaceFactor
;
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockA
,
sizeA
,
blocking
.
blockA
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockB
,
sizeB
,
blocking
.
blockB
());
ei_declare_aligned_stack_constructed_variable
(
Scalar
,
blockW
,
sizeW
,
blocking
.
blockW
());
conj_if
<
Conjugate
>
conj
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
nr
,
false
,
Conjugate
>
gebp_kernel
;
gemm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
Traits
::
nr
,
RhsStorageOrder
,
false
,
true
>
pack_rhs_panel
;
gemm_pack_lhs
<
Scalar
,
Index
,
Traits
::
mr
,
Traits
::
LhsProgress
,
ColMajor
,
false
,
true
>
pack_lhs_panel
;
gebp_kernel
<
Scalar
,
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
nr
,
false
,
Conjugate
>
gebp_kernel
;
gemm_pack_rhs
<
Scalar
,
Index
,
RhsMapper
,
Traits
::
nr
,
RhsStorageOrder
>
pack_rhs
;
gemm_pack_rhs
<
Scalar
,
Index
,
RhsMapper
,
Traits
::
nr
,
RhsStorageOrder
,
false
,
true
>
pack_rhs_panel
;
gemm_pack_lhs
<
Scalar
,
Index
,
LhsMapper
,
Traits
::
mr
,
Traits
::
LhsProgress
,
ColMajor
,
false
,
true
>
pack_lhs_panel
;
for
(
Index
k2
=
IsLower
?
size
:
0
;
IsLower
?
k2
>
0
:
k2
<
size
;
...
...
@@ -225,7 +242,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
Index
rs
=
IsLower
?
actual_k2
:
size
-
actual_k2
-
actual_kc
;
Scalar
*
geb
=
blockB
+
actual_kc
*
actual_kc
;
if
(
rs
>
0
)
pack_rhs
(
geb
,
&
rhs
(
actual_k2
,
startPanel
),
triStride
,
actual_kc
,
rs
);
if
(
rs
>
0
)
pack_rhs
(
geb
,
rhs
.
getSubMapper
(
actual_k2
,
startPanel
),
actual_kc
,
rs
);
// triangular packing (we only pack the panels off the diagonal,
// neglecting the blocks overlapping the diagonal
...
...
@@ -239,7 +256,7 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
if
(
panelLength
>
0
)
pack_rhs_panel
(
blockB
+
j2
*
actual_kc
,
&
rhs
(
actual_k2
+
panelOffset
,
actual_j2
),
triStride
,
rhs
.
getSubMapper
(
actual_k2
+
panelOffset
,
actual_j2
),
panelLength
,
actualPanelWidth
,
actual_kc
,
panelOffset
);
}
...
...
@@ -267,13 +284,12 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
// GEBP
if
(
panelLength
>
0
)
{
gebp_kernel
(
&
lhs
(
i2
,
absolute_j2
),
otherStride
,
gebp_kernel
(
lhs
.
getSubMapper
(
i2
,
absolute_j2
),
blockA
,
blockB
+
j2
*
actual_kc
,
actual_mc
,
panelLength
,
actualPanelWidth
,
Scalar
(
-
1
),
actual_kc
,
actual_kc
,
// strides
panelOffset
,
panelOffset
,
// offsets
blockW
);
// workspace
panelOffset
,
panelOffset
);
// offsets
}
// unblocked triangular solve
...
...
@@ -281,34 +297,36 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
{
Index
j
=
IsLower
?
absolute_j2
+
actualPanelWidth
-
k
-
1
:
absolute_j2
+
k
;
Scalar
*
r
=
&
lhs
(
i2
,
j
);
typename
LhsMapper
::
LinearMapper
r
=
lhs
.
getLinearMapper
(
i2
,
j
);
for
(
Index
k3
=
0
;
k3
<
k
;
++
k3
)
{
Scalar
b
=
conj
(
rhs
(
IsLower
?
j
+
1
+
k3
:
absolute_j2
+
k3
,
j
));
Scalar
*
a
=
&
lhs
(
i2
,
IsLower
?
j
+
1
+
k3
:
absolute_j2
+
k3
);
typename
LhsMapper
::
LinearMapper
a
=
lhs
.
getLinearMapper
(
i2
,
IsLower
?
j
+
1
+
k3
:
absolute_j2
+
k3
);
for
(
Index
i
=
0
;
i
<
actual_mc
;
++
i
)
r
[
i
]
-=
a
[
i
]
*
b
;
r
(
i
)
-=
a
(
i
)
*
b
;
}
if
((
Mode
&
UnitDiag
)
==
0
)
{
Scalar
inv_rjj
=
RealScalar
(
1
)
/
conj
(
rhs
(
j
,
j
));
for
(
Index
i
=
0
;
i
<
actual_mc
;
++
i
)
r
(
i
)
*=
inv_rjj
;
}
Scalar
b
=
(
Mode
&
UnitDiag
)
?
Scalar
(
1
)
:
Scalar
(
1
)
/
conj
(
rhs
(
j
,
j
));
for
(
Index
i
=
0
;
i
<
actual_mc
;
++
i
)
r
[
i
]
*=
b
;
}
// pack the just computed part of lhs to A
pack_lhs_panel
(
blockA
,
_other
+
absolute_j2
*
otherStride
+
i2
,
otherStride
,
pack_lhs_panel
(
blockA
,
lhs
.
getSubMapper
(
i2
,
absolute_j2
)
,
actualPanelWidth
,
actual_mc
,
actual_kc
,
j2
);
}
}
if
(
rs
>
0
)
gebp_kernel
(
_oth
er
+
i2
+
startPanel
*
otherStride
,
otherStride
,
blockA
,
geb
,
gebp_kernel
(
lhs
.
getSubMapp
er
(
i2
,
startPanel
)
,
blockA
,
geb
,
actual_mc
,
actual_kc
,
rs
,
Scalar
(
-
1
),
-
1
,
-
1
,
0
,
0
,
blockW
);
-
1
,
-
1
,
0
,
0
);
}
}
}
};
}
// end namespace internal
...
...
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
0 → 100644
View file @
13b115ab
/*
Copyright (c) 2011, Intel Corporation. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
********************************************************************************
* Content : Eigen bindings to BLAS F77
* Triangular matrix * matrix product functionality based on ?TRMM.
********************************************************************************
*/
#ifndef EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
#define EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
namespace
Eigen
{
namespace
internal
{
// implements LeftSide op(triangular)^-1 * general
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>&
/*blocking*/
) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
eigen_assert(otherIncr == 1); \
BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
char side = 'L', uplo, diag='N', transa; \
/* Set alpha_ */
\
EIGTYPE alpha(1); \
ldb = convert_index<BlasIndex>(otherStride);\
\
const EIGTYPE *a; \
/* Set trans */
\
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */
\
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _tri; \
lda = convert_index<BlasIndex>(triStride); \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/
\
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_L
(
double
,
double
,
dtrsm
)
EIGEN_BLAS_TRSM_L
(
dcomplex
,
MKL_Complex16
,
ztrsm
)
EIGEN_BLAS_TRSM_L
(
float
,
float
,
strsm
)
EIGEN_BLAS_TRSM_L
(
scomplex
,
MKL_Complex8
,
ctrsm
)
#else
EIGEN_BLAS_TRSM_L
(
double
,
double
,
dtrsm_
)
EIGEN_BLAS_TRSM_L
(
dcomplex
,
double
,
ztrsm_
)
EIGEN_BLAS_TRSM_L
(
float
,
float
,
strsm_
)
EIGEN_BLAS_TRSM_L
(
scomplex
,
float
,
ctrsm_
)
#endif
// implements RightSide general * op(triangular)^-1
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
{ \
enum { \
IsLower = (Mode&Lower) == Lower, \
IsUnitDiag = (Mode&UnitDiag) ? 1 : 0, \
IsZeroDiag = (Mode&ZeroDiag) ? 1 : 0, \
conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
}; \
static void run( \
Index size, Index otherSize, \
const EIGTYPE* _tri, Index triStride, \
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>&
/*blocking*/
) \
{ \
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
eigen_assert(otherIncr == 1); \
BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
char side = 'R', uplo, diag='N', transa; \
/* Set alpha_ */
\
EIGTYPE alpha(1); \
ldb = convert_index<BlasIndex>(otherStride);\
\
const EIGTYPE *a; \
/* Set trans */
\
transa = (TriStorageOrder==RowMajor) ? ((Conjugate) ? 'C' : 'T') : 'N'; \
/* Set uplo */
\
uplo = IsLower ? 'L' : 'U'; \
if (TriStorageOrder==RowMajor) uplo = (uplo == 'L') ? 'U' : 'L'; \
/* Set a, lda */
\
typedef Matrix<EIGTYPE, Dynamic, Dynamic, TriStorageOrder> MatrixTri; \
Map<const MatrixTri, 0, OuterStride<> > tri(_tri,size,size,OuterStride<>(triStride)); \
MatrixTri a_tmp; \
\
if (conjA) { \
a_tmp = tri.conjugate(); \
a = a_tmp.data(); \
lda = convert_index<BlasIndex>(a_tmp.outerStride()); \
} else { \
a = _tri; \
lda = convert_index<BlasIndex>(triStride); \
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/
\
BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/
\
} \
};
#ifdef EIGEN_USE_MKL
EIGEN_BLAS_TRSM_R
(
double
,
double
,
dtrsm
)
EIGEN_BLAS_TRSM_R
(
dcomplex
,
MKL_Complex16
,
ztrsm
)
EIGEN_BLAS_TRSM_R
(
float
,
float
,
strsm
)
EIGEN_BLAS_TRSM_R
(
scomplex
,
MKL_Complex8
,
ctrsm
)
#else
EIGEN_BLAS_TRSM_R
(
double
,
double
,
dtrsm_
)
EIGEN_BLAS_TRSM_R
(
dcomplex
,
double
,
ztrsm_
)
EIGEN_BLAS_TRSM_R
(
float
,
float
,
strsm_
)
EIGEN_BLAS_TRSM_R
(
scomplex
,
float
,
ctrsm_
)
#endif
}
// end namespace internal
}
// end namespace Eigen
#endif // EIGEN_TRIANGULAR_SOLVER_MATRIX_BLAS_H
pydensecrf/densecrf/include/Eigen/src/Core/products/TriangularSolverVector.h
View file @
13b115ab
...
...
@@ -10,7 +10,7 @@
#ifndef EIGEN_TRIANGULAR_SOLVER_VECTOR_H
#define EIGEN_TRIANGULAR_SOLVER_VECTOR_H
namespace
Eigen
{
namespace
Eigen
{
namespace
internal
{
...
...
@@ -25,7 +25,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheRight, Mode, Co
>::
run
(
size
,
_lhs
,
lhsStride
,
rhs
);
}
};
// forward and backward substitution, row-major, rhs is a vector
template
<
typename
LhsScalar
,
typename
RhsScalar
,
typename
Index
,
int
Mode
,
bool
Conjugate
>
struct
triangular_solve_vector
<
LhsScalar
,
RhsScalar
,
Index
,
OnTheLeft
,
Mode
,
Conjugate
,
RowMajor
>
...
...
@@ -37,6 +37,10 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
{
typedef
Map
<
const
Matrix
<
LhsScalar
,
Dynamic
,
Dynamic
,
RowMajor
>
,
0
,
OuterStride
<>
>
LhsMap
;
const
LhsMap
lhs
(
_lhs
,
size
,
size
,
OuterStride
<>
(
lhsStride
));
typedef
const_blas_data_mapper
<
LhsScalar
,
Index
,
RowMajor
>
LhsMapper
;
typedef
const_blas_data_mapper
<
RhsScalar
,
Index
,
ColMajor
>
RhsMapper
;
typename
internal
::
conditional
<
Conjugate
,
const
CwiseUnaryOp
<
typename
internal
::
scalar_conjugate_op
<
LhsScalar
>
,
LhsMap
>
,
...
...
@@ -58,10 +62,10 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
Index
startRow
=
IsLower
?
pi
:
pi
-
actualPanelWidth
;
Index
startCol
=
IsLower
?
0
:
pi
;
general_matrix_vector_product
<
Index
,
LhsScalar
,
RowMajor
,
Conjugate
,
RhsScalar
,
false
>::
run
(
general_matrix_vector_product
<
Index
,
LhsScalar
,
LhsMapper
,
RowMajor
,
Conjugate
,
RhsScalar
,
RhsMapper
,
false
>::
run
(
actualPanelWidth
,
r
,
&
lhs
.
coeffRef
(
startRow
,
startCol
),
lhsStride
,
rhs
+
startCol
,
1
,
LhsMapper
(
&
lhs
.
coeffRef
(
startRow
,
startCol
),
lhsStride
)
,
RhsMapper
(
rhs
+
startCol
,
1
)
,
rhs
+
startRow
,
1
,
RhsScalar
(
-
1
));
}
...
...
@@ -72,7 +76,7 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
Index
s
=
IsLower
?
pi
:
i
+
1
;
if
(
k
>
0
)
rhs
[
i
]
-=
(
cjLhs
.
row
(
i
).
segment
(
s
,
k
).
transpose
().
cwiseProduct
(
Map
<
const
Matrix
<
RhsScalar
,
Dynamic
,
1
>
>
(
rhs
+
s
,
k
))).
sum
();
if
(
!
(
Mode
&
UnitDiag
))
rhs
[
i
]
/=
cjLhs
(
i
,
i
);
}
...
...
@@ -91,6 +95,8 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
{
typedef
Map
<
const
Matrix
<
LhsScalar
,
Dynamic
,
Dynamic
,
ColMajor
>
,
0
,
OuterStride
<>
>
LhsMap
;
const
LhsMap
lhs
(
_lhs
,
size
,
size
,
OuterStride
<>
(
lhsStride
));
typedef
const_blas_data_mapper
<
LhsScalar
,
Index
,
ColMajor
>
LhsMapper
;
typedef
const_blas_data_mapper
<
RhsScalar
,
Index
,
ColMajor
>
RhsMapper
;
typename
internal
::
conditional
<
Conjugate
,
const
CwiseUnaryOp
<
typename
internal
::
scalar_conjugate_op
<
LhsScalar
>
,
LhsMap
>
,
const
LhsMap
&
...
...
@@ -122,10 +128,10 @@ struct triangular_solve_vector<LhsScalar, RhsScalar, Index, OnTheLeft, Mode, Con
// let's directly call the low level product function because:
// 1 - it is faster to compile
// 2 - it is slighlty faster at runtime
general_matrix_vector_product
<
Index
,
LhsScalar
,
ColMajor
,
Conjugate
,
RhsScalar
,
false
>::
run
(
general_matrix_vector_product
<
Index
,
LhsScalar
,
LhsMapper
,
ColMajor
,
Conjugate
,
RhsScalar
,
RhsMapper
,
false
>::
run
(
r
,
actualPanelWidth
,
&
lhs
.
coeffRef
(
endBlock
,
startBlock
),
lhsStride
,
rhs
+
startBlock
,
1
,
LhsMapper
(
&
lhs
.
coeffRef
(
endBlock
,
startBlock
),
lhsStride
)
,
RhsMapper
(
rhs
+
startBlock
,
1
)
,
rhs
+
endBlock
,
1
,
RhsScalar
(
-
1
));
}
}
...
...
pydensecrf/densecrf/include/Eigen/src/Core/util/BlasUtil.h
View file @
13b115ab
...
...
@@ -18,23 +18,25 @@ namespace Eigen {
namespace
internal
{
// forward declarations
template
<
typename
LhsScalar
,
typename
RhsScalar
,
typename
Index
,
int
mr
,
int
nr
,
bool
ConjugateLhs
=
false
,
bool
ConjugateRhs
=
false
>
template
<
typename
LhsScalar
,
typename
RhsScalar
,
typename
Index
,
typename
DataMapper
,
int
mr
,
int
nr
,
bool
ConjugateLhs
=
false
,
bool
ConjugateRhs
=
false
>
struct
gebp_kernel
;
template
<
typename
Scalar
,
typename
Index
,
int
nr
,
int
StorageOrder
,
bool
Conjugate
=
false
,
bool
PanelMode
=
false
>
template
<
typename
Scalar
,
typename
Index
,
typename
DataMapper
,
int
nr
,
int
StorageOrder
,
bool
Conjugate
=
false
,
bool
PanelMode
=
false
>
struct
gemm_pack_rhs
;
template
<
typename
Scalar
,
typename
Index
,
int
Pack1
,
int
Pack2
,
int
StorageOrder
,
bool
Conjugate
=
false
,
bool
PanelMode
=
false
>
template
<
typename
Scalar
,
typename
Index
,
typename
DataMapper
,
int
Pack1
,
int
Pack2
,
int
StorageOrder
,
bool
Conjugate
=
false
,
bool
PanelMode
=
false
>
struct
gemm_pack_lhs
;
template
<
typename
Index
,
typename
LhsScalar
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
typename
RhsScalar
,
int
RhsStorageOrder
,
bool
ConjugateRhs
,
int
ResStorageOrder
>
int
ResStorageOrder
,
int
ResInnerStride
>
struct
general_matrix_matrix_product
;
template
<
typename
Index
,
typename
LhsScalar
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
typename
RhsScalar
,
bool
ConjugateRhs
,
int
Version
=
Specialized
>
template
<
typename
Index
,
typename
LhsScalar
,
typename
LhsMapper
,
int
LhsStorageOrder
,
bool
ConjugateLhs
,
typename
RhsScalar
,
typename
RhsMapper
,
bool
ConjugateRhs
,
int
Version
=
Specialized
>
struct
general_matrix_vector_product
;
...
...
@@ -42,22 +44,35 @@ template<bool Conjugate> struct conj_if;
template
<
>
struct
conj_if
<
true
>
{
template
<
typename
T
>
inline
T
operator
()(
const
T
&
x
)
{
return
conj
(
x
);
}
inline
T
operator
()(
const
T
&
x
)
const
{
return
numext
::
conj
(
x
);
}
template
<
typename
T
>
inline
T
pconj
(
const
T
&
x
)
{
return
internal
::
pconj
(
x
);
}
inline
T
pconj
(
const
T
&
x
)
const
{
return
internal
::
pconj
(
x
);
}
};
template
<
>
struct
conj_if
<
false
>
{
template
<
typename
T
>
inline
const
T
&
operator
()(
const
T
&
x
)
{
return
x
;
}
inline
const
T
&
operator
()(
const
T
&
x
)
const
{
return
x
;
}
template
<
typename
T
>
inline
const
T
&
pconj
(
const
T
&
x
)
{
return
x
;
}
inline
const
T
&
pconj
(
const
T
&
x
)
const
{
return
x
;
}
};
// Generic implementation for custom complex types.
template
<
typename
LhsScalar
,
typename
RhsScalar
,
bool
ConjLhs
,
bool
ConjRhs
>
struct
conj_helper
{
typedef
typename
ScalarBinaryOpTraits
<
LhsScalar
,
RhsScalar
>::
ReturnType
Scalar
;
EIGEN_STRONG_INLINE
Scalar
pmadd
(
const
LhsScalar
&
x
,
const
RhsScalar
&
y
,
const
Scalar
&
c
)
const
{
return
padd
(
c
,
pmul
(
x
,
y
));
}
EIGEN_STRONG_INLINE
Scalar
pmul
(
const
LhsScalar
&
x
,
const
RhsScalar
&
y
)
const
{
return
conj_if
<
ConjLhs
>
()(
x
)
*
conj_if
<
ConjRhs
>
()(
y
);
}
};
template
<
typename
Scalar
>
struct
conj_helper
<
Scalar
,
Scalar
,
false
,
false
>
{
EIGEN_STRONG_INLINE
Scalar
pmadd
(
const
Scalar
&
x
,
const
Scalar
&
y
,
const
Scalar
&
c
)
const
{
return
internal
::
pmadd
(
x
,
y
,
c
);
}
EIGEN_STRONG_INLINE
Scalar
pmul
(
const
Scalar
&
x
,
const
Scalar
&
y
)
const
{
return
internal
::
pmul
(
x
,
y
);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
Scalar
pmadd
(
const
Scalar
&
x
,
const
Scalar
&
y
,
const
Scalar
&
c
)
const
{
return
internal
::
pmadd
(
x
,
y
,
c
);
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE
Scalar
pmul
(
const
Scalar
&
x
,
const
Scalar
&
y
)
const
{
return
internal
::
pmul
(
x
,
y
);
}
};
template
<
typename
RealScalar
>
struct
conj_helper
<
std
::
complex
<
RealScalar
>
,
std
::
complex
<
RealScalar
>
,
false
,
true
>
...
...
@@ -67,7 +82,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
{
return
c
+
pmul
(
x
,
y
);
}
EIGEN_STRONG_INLINE
Scalar
pmul
(
const
Scalar
&
x
,
const
Scalar
&
y
)
const
{
return
Scalar
(
real
(
x
)
*
real
(
y
)
+
imag
(
x
)
*
imag
(
y
),
imag
(
x
)
*
real
(
y
)
-
real
(
x
)
*
imag
(
y
));
}
{
return
Scalar
(
numext
::
real
(
x
)
*
numext
::
real
(
y
)
+
numext
::
imag
(
x
)
*
numext
::
imag
(
y
),
numext
::
imag
(
x
)
*
numext
::
real
(
y
)
-
numext
::
real
(
x
)
*
numext
::
imag
(
y
));
}
};
template
<
typename
RealScalar
>
struct
conj_helper
<
std
::
complex
<
RealScalar
>
,
std
::
complex
<
RealScalar
>
,
true
,
false
>
...
...
@@ -77,7 +92,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
{
return
c
+
pmul
(
x
,
y
);
}
EIGEN_STRONG_INLINE
Scalar
pmul
(
const
Scalar
&
x
,
const
Scalar
&
y
)
const
{
return
Scalar
(
real
(
x
)
*
real
(
y
)
+
imag
(
x
)
*
imag
(
y
),
real
(
x
)
*
imag
(
y
)
-
imag
(
x
)
*
real
(
y
));
}
{
return
Scalar
(
numext
::
real
(
x
)
*
numext
::
real
(
y
)
+
numext
::
imag
(
x
)
*
numext
::
imag
(
y
),
numext
::
real
(
x
)
*
numext
::
imag
(
y
)
-
numext
::
imag
(
x
)
*
numext
::
real
(
y
));
}
};
template
<
typename
RealScalar
>
struct
conj_helper
<
std
::
complex
<
RealScalar
>
,
std
::
complex
<
RealScalar
>
,
true
,
true
>
...
...
@@ -87,7 +102,7 @@ template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::
{
return
c
+
pmul
(
x
,
y
);
}
EIGEN_STRONG_INLINE
Scalar
pmul
(
const
Scalar
&
x
,
const
Scalar
&
y
)
const
{
return
Scalar
(
real
(
x
)
*
real
(
y
)
-
imag
(
x
)
*
imag
(
y
),
-
real
(
x
)
*
imag
(
y
)
-
imag
(
x
)
*
real
(
y
));
}
{
return
Scalar
(
numext
::
real
(
x
)
*
numext
::
real
(
y
)
-
numext
::
imag
(
x
)
*
numext
::
imag
(
y
),
-
numext
::
real
(
x
)
*
numext
::
imag
(
y
)
-
numext
::
imag
(
x
)
*
numext
::
real
(
y
));
}
};
template
<
typename
RealScalar
,
bool
Conj
>
struct
conj_helper
<
std
::
complex
<
RealScalar
>
,
RealScalar
,
Conj
,
false
>
...
...
@@ -109,39 +124,243 @@ template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::comp
};
template
<
typename
From
,
typename
To
>
struct
get_factor
{
static
EIGEN_STRONG_INLINE
To
run
(
const
From
&
x
)
{
return
x
;
}
EIGEN_DEVICE_FUNC
static
EIGEN_STRONG_INLINE
To
run
(
const
From
&
x
)
{
return
To
(
x
)
;
}
};
template
<
typename
Scalar
>
struct
get_factor
<
Scalar
,
typename
NumTraits
<
Scalar
>::
Real
>
{
static
EIGEN_STRONG_INLINE
typename
NumTraits
<
Scalar
>::
Real
run
(
const
Scalar
&
x
)
{
return
real
(
x
);
}
EIGEN_DEVICE_FUNC
static
EIGEN_STRONG_INLINE
typename
NumTraits
<
Scalar
>::
Real
run
(
const
Scalar
&
x
)
{
return
numext
::
real
(
x
);
}
};
template
<
typename
Scalar
,
typename
Index
>
class
BlasVectorMapper
{
public:
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
BlasVectorMapper
(
Scalar
*
data
)
:
m_data
(
data
)
{}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Scalar
operator
()(
Index
i
)
const
{
return
m_data
[
i
];
}
template
<
typename
Packet
,
int
AlignmentType
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Packet
load
(
Index
i
)
const
{
return
ploadt
<
Packet
,
AlignmentType
>
(
m_data
+
i
);
}
template
<
typename
Packet
>
EIGEN_DEVICE_FUNC
bool
aligned
(
Index
i
)
const
{
return
(
UIntPtr
(
m_data
+
i
)
%
sizeof
(
Packet
))
==
0
;
}
protected:
Scalar
*
m_data
;
};
template
<
typename
Scalar
,
typename
Index
,
int
AlignmentType
,
int
Incr
=
1
>
class
BlasLinearMapper
;
template
<
typename
Scalar
,
typename
Index
,
int
AlignmentType
>
class
BlasLinearMapper
<
Scalar
,
Index
,
AlignmentType
,
1
>
{
public:
typedef
typename
packet_traits
<
Scalar
>::
type
Packet
;
typedef
typename
packet_traits
<
Scalar
>::
half
HalfPacket
;
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
BlasLinearMapper
(
Scalar
*
data
,
Index
incr
=
1
)
:
m_data
(
data
)
{
EIGEN_ONLY_USED_FOR_DEBUG
(
incr
);
eigen_assert
(
incr
==
1
);
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
void
prefetch
(
int
i
)
const
{
internal
::
prefetch
(
&
operator
()(
i
));
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Scalar
&
operator
()(
Index
i
)
const
{
return
m_data
[
i
];
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Packet
loadPacket
(
Index
i
)
const
{
return
ploadt
<
Packet
,
AlignmentType
>
(
m_data
+
i
);
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
HalfPacket
loadHalfPacket
(
Index
i
)
const
{
return
ploadt
<
HalfPacket
,
AlignmentType
>
(
m_data
+
i
);
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
void
storePacket
(
Index
i
,
const
Packet
&
p
)
const
{
pstoret
<
Scalar
,
Packet
,
AlignmentType
>
(
m_data
+
i
,
p
);
}
protected:
Scalar
*
m_data
;
};
// Lightweight helper class to access matrix coefficients.
// Yes, this is somehow redundant with Map<>, but this version is much much lighter,
// and so I hope better compilation performance (time and code quality).
template
<
typename
Scalar
,
typename
Index
,
int
StorageOrder
>
class
blas_data_mapper
template
<
typename
Scalar
,
typename
Index
,
int
StorageOrder
,
int
AlignmentType
=
Unaligned
,
int
Incr
=
1
>
class
blas_data_mapper
;
template
<
typename
Scalar
,
typename
Index
,
int
StorageOrder
,
int
AlignmentType
>
class
blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
,
AlignmentType
,
1
>
{
public:
blas_data_mapper
(
Scalar
*
data
,
Index
stride
)
:
m_data
(
data
),
m_stride
(
stride
)
{}
EIGEN_STRONG_INLINE
Scalar
&
operator
()(
Index
i
,
Index
j
)
{
return
m_data
[
StorageOrder
==
RowMajor
?
j
+
i
*
m_stride
:
i
+
j
*
m_stride
];
}
public:
typedef
typename
packet_traits
<
Scalar
>::
type
Packet
;
typedef
typename
packet_traits
<
Scalar
>::
half
HalfPacket
;
typedef
BlasLinearMapper
<
Scalar
,
Index
,
AlignmentType
>
LinearMapper
;
typedef
BlasVectorMapper
<
Scalar
,
Index
>
VectorMapper
;
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
blas_data_mapper
(
Scalar
*
data
,
Index
stride
,
Index
incr
=
1
)
:
m_data
(
data
),
m_stride
(
stride
)
{
EIGEN_ONLY_USED_FOR_DEBUG
(
incr
);
eigen_assert
(
incr
==
1
);
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
,
AlignmentType
>
getSubMapper
(
Index
i
,
Index
j
)
const
{
return
blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
,
AlignmentType
>
(
&
operator
()(
i
,
j
),
m_stride
);
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
LinearMapper
getLinearMapper
(
Index
i
,
Index
j
)
const
{
return
LinearMapper
(
&
operator
()(
i
,
j
));
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
VectorMapper
getVectorMapper
(
Index
i
,
Index
j
)
const
{
return
VectorMapper
(
&
operator
()(
i
,
j
));
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Scalar
&
operator
()(
Index
i
,
Index
j
)
const
{
return
m_data
[
StorageOrder
==
RowMajor
?
j
+
i
*
m_stride
:
i
+
j
*
m_stride
];
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Packet
loadPacket
(
Index
i
,
Index
j
)
const
{
return
ploadt
<
Packet
,
AlignmentType
>
(
&
operator
()(
i
,
j
));
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
HalfPacket
loadHalfPacket
(
Index
i
,
Index
j
)
const
{
return
ploadt
<
HalfPacket
,
AlignmentType
>
(
&
operator
()(
i
,
j
));
}
template
<
typename
SubPacket
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
void
scatterPacket
(
Index
i
,
Index
j
,
const
SubPacket
&
p
)
const
{
pscatter
<
Scalar
,
SubPacket
>
(
&
operator
()(
i
,
j
),
p
,
m_stride
);
}
template
<
typename
SubPacket
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
SubPacket
gatherPacket
(
Index
i
,
Index
j
)
const
{
return
pgather
<
Scalar
,
SubPacket
>
(
&
operator
()(
i
,
j
),
m_stride
);
}
EIGEN_DEVICE_FUNC
const
Index
stride
()
const
{
return
m_stride
;
}
EIGEN_DEVICE_FUNC
const
Scalar
*
data
()
const
{
return
m_data
;
}
EIGEN_DEVICE_FUNC
Index
firstAligned
(
Index
size
)
const
{
if
(
UIntPtr
(
m_data
)
%
sizeof
(
Scalar
))
{
return
-
1
;
}
return
internal
::
first_default_aligned
(
m_data
,
size
);
}
protected:
Scalar
*
EIGEN_RESTRICT
m_data
;
Index
m_stride
;
Scalar
*
EIGEN_RESTRICT
m_data
;
const
Index
m_stride
;
};
// Implementation of non-natural increment (i.e. inner-stride != 1)
// The exposed API is not complete yet compared to the Incr==1 case
// because some features makes less sense in this case.
template
<
typename
Scalar
,
typename
Index
,
int
AlignmentType
,
int
Incr
>
class
BlasLinearMapper
{
public:
typedef
typename
packet_traits
<
Scalar
>::
type
Packet
;
typedef
typename
packet_traits
<
Scalar
>::
half
HalfPacket
;
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
BlasLinearMapper
(
Scalar
*
data
,
Index
incr
)
:
m_data
(
data
),
m_incr
(
incr
)
{}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
void
prefetch
(
int
i
)
const
{
internal
::
prefetch
(
&
operator
()(
i
));
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Scalar
&
operator
()(
Index
i
)
const
{
return
m_data
[
i
*
m_incr
.
value
()];
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Packet
loadPacket
(
Index
i
)
const
{
return
pgather
<
Scalar
,
Packet
>
(
m_data
+
i
*
m_incr
.
value
(),
m_incr
.
value
());
}
template
<
typename
PacketType
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
void
storePacket
(
Index
i
,
const
PacketType
&
p
)
const
{
pscatter
<
Scalar
,
PacketType
>
(
m_data
+
i
*
m_incr
.
value
(),
p
,
m_incr
.
value
());
}
protected:
Scalar
*
m_data
;
const
internal
::
variable_if_dynamic
<
Index
,
Incr
>
m_incr
;
};
template
<
typename
Scalar
,
typename
Index
,
int
StorageOrder
,
int
AlignmentType
,
int
Incr
>
class
blas_data_mapper
{
public:
typedef
typename
packet_traits
<
Scalar
>::
type
Packet
;
typedef
typename
packet_traits
<
Scalar
>::
half
HalfPacket
;
typedef
BlasLinearMapper
<
Scalar
,
Index
,
AlignmentType
,
Incr
>
LinearMapper
;
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
blas_data_mapper
(
Scalar
*
data
,
Index
stride
,
Index
incr
)
:
m_data
(
data
),
m_stride
(
stride
),
m_incr
(
incr
)
{}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
blas_data_mapper
getSubMapper
(
Index
i
,
Index
j
)
const
{
return
blas_data_mapper
(
&
operator
()(
i
,
j
),
m_stride
,
m_incr
.
value
());
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
LinearMapper
getLinearMapper
(
Index
i
,
Index
j
)
const
{
return
LinearMapper
(
&
operator
()(
i
,
j
),
m_incr
.
value
());
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Scalar
&
operator
()(
Index
i
,
Index
j
)
const
{
return
m_data
[
StorageOrder
==
RowMajor
?
j
*
m_incr
.
value
()
+
i
*
m_stride
:
i
*
m_incr
.
value
()
+
j
*
m_stride
];
}
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
Packet
loadPacket
(
Index
i
,
Index
j
)
const
{
return
pgather
<
Scalar
,
Packet
>
(
&
operator
()(
i
,
j
),
m_incr
.
value
());
}
template
<
typename
PacketT
,
int
AlignmentT
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
PacketT
load
(
Index
i
,
Index
j
)
const
{
return
pgather
<
Scalar
,
PacketT
>
(
&
operator
()(
i
,
j
),
m_incr
.
value
());
}
template
<
typename
SubPacket
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
void
scatterPacket
(
Index
i
,
Index
j
,
const
SubPacket
&
p
)
const
{
pscatter
<
Scalar
,
SubPacket
>
(
&
operator
()(
i
,
j
),
p
,
m_stride
);
}
template
<
typename
SubPacket
>
EIGEN_DEVICE_FUNC
EIGEN_ALWAYS_INLINE
SubPacket
gatherPacket
(
Index
i
,
Index
j
)
const
{
return
pgather
<
Scalar
,
SubPacket
>
(
&
operator
()(
i
,
j
),
m_stride
);
}
protected:
Scalar
*
EIGEN_RESTRICT
m_data
;
const
Index
m_stride
;
const
internal
::
variable_if_dynamic
<
Index
,
Incr
>
m_incr
;
};
// lightweight helper class to access matrix coefficients (const version)
template
<
typename
Scalar
,
typename
Index
,
int
StorageOrder
>
class
const_blas_data_mapper
{
class
const_blas_data_mapper
:
public
blas_data_mapper
<
const
Scalar
,
Index
,
StorageOrder
>
{
public:
const_blas_data_mapper
(
const
Scalar
*
data
,
Index
stride
)
:
m_data
(
data
),
m_stride
(
stride
)
{}
EIGEN_STRONG_INLINE
const
Scalar
&
operator
()(
Index
i
,
Index
j
)
const
{
return
m_data
[
StorageOrder
==
RowMajor
?
j
+
i
*
m_stride
:
i
+
j
*
m_stride
];
}
protected:
const
Scalar
*
EIGEN_RESTRICT
m_data
;
Index
m_stride
;
EIGEN_ALWAYS_INLINE
const_blas_data_mapper
(
const
Scalar
*
data
,
Index
stride
)
:
blas_data_mapper
<
const
Scalar
,
Index
,
StorageOrder
>
(
data
,
stride
)
{}
EIGEN_ALWAYS_INLINE
const_blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
>
getSubMapper
(
Index
i
,
Index
j
)
const
{
return
const_blas_data_mapper
<
Scalar
,
Index
,
StorageOrder
>
(
&
(
this
->
operator
()(
i
,
j
)),
this
->
m_stride
);
}
};
...
...
@@ -188,17 +407,33 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
};
// pop scalar multiple
template
<
typename
Scalar
,
typename
NestedXpr
>
struct
blas_traits
<
Cwise
U
naryOp
<
scalar_
multiple
_op
<
Scalar
>
,
NestedXpr
>
>
template
<
typename
Scalar
,
typename
NestedXpr
,
typename
Plain
>
struct
blas_traits
<
Cwise
Bi
naryOp
<
scalar_
product_op
<
Scalar
>
,
const
CwiseNullaryOp
<
scalar_constant
_op
<
Scalar
>
,
Plain
>
,
NestedXpr
>
>
:
blas_traits
<
NestedXpr
>
{
typedef
blas_traits
<
NestedXpr
>
Base
;
typedef
Cwise
U
naryOp
<
scalar_
multiple
_op
<
Scalar
>
,
NestedXpr
>
XprType
;
typedef
Cwise
Bi
naryOp
<
scalar_
product_op
<
Scalar
>
,
const
CwiseNullaryOp
<
scalar_constant
_op
<
Scalar
>
,
Plain
>
,
NestedXpr
>
XprType
;
typedef
typename
Base
::
ExtractType
ExtractType
;
static
inline
ExtractType
extract
(
const
XprType
&
x
)
{
return
Base
::
extract
(
x
.
nestedExpression
());
}
static
inline
ExtractType
extract
(
const
XprType
&
x
)
{
return
Base
::
extract
(
x
.
rhs
());
}
static
inline
Scalar
extractScalarFactor
(
const
XprType
&
x
)
{
return
x
.
lhs
().
functor
().
m_other
*
Base
::
extractScalarFactor
(
x
.
rhs
());
}
};
template
<
typename
Scalar
,
typename
NestedXpr
,
typename
Plain
>
struct
blas_traits
<
CwiseBinaryOp
<
scalar_product_op
<
Scalar
>
,
NestedXpr
,
const
CwiseNullaryOp
<
scalar_constant_op
<
Scalar
>
,
Plain
>
>
>
:
blas_traits
<
NestedXpr
>
{
typedef
blas_traits
<
NestedXpr
>
Base
;
typedef
CwiseBinaryOp
<
scalar_product_op
<
Scalar
>
,
NestedXpr
,
const
CwiseNullaryOp
<
scalar_constant_op
<
Scalar
>
,
Plain
>
>
XprType
;
typedef
typename
Base
::
ExtractType
ExtractType
;
static
inline
ExtractType
extract
(
const
XprType
&
x
)
{
return
Base
::
extract
(
x
.
lhs
());
}
static
inline
Scalar
extractScalarFactor
(
const
XprType
&
x
)
{
return
x
.
functor
().
m_other
*
Base
::
extractScalarFactor
(
x
.
nestedExpression
())
;
}
{
return
Base
::
extractScalarFactor
(
x
.
lhs
())
*
x
.
rhs
().
functor
().
m_other
;
}
};
template
<
typename
Scalar
,
typename
Plain1
,
typename
Plain2
>
struct
blas_traits
<
CwiseBinaryOp
<
scalar_product_op
<
Scalar
>
,
const
CwiseNullaryOp
<
scalar_constant_op
<
Scalar
>
,
Plain1
>
,
const
CwiseNullaryOp
<
scalar_constant_op
<
Scalar
>
,
Plain2
>
>
>
:
blas_traits
<
CwiseNullaryOp
<
scalar_constant_op
<
Scalar
>
,
Plain1
>
>
{};
// pop opposite
template
<
typename
Scalar
,
typename
NestedXpr
>
...
...
@@ -230,7 +465,7 @@ struct blas_traits<Transpose<NestedXpr> >
enum
{
IsTransposed
=
Base
::
IsTransposed
?
0
:
1
};
static
inline
ExtractType
extract
(
const
XprType
&
x
)
{
return
Base
::
extract
(
x
.
nestedExpression
());
}
static
inline
ExtractType
extract
(
const
XprType
&
x
)
{
return
ExtractType
(
Base
::
extract
(
x
.
nestedExpression
())
)
;
}
static
inline
Scalar
extractScalarFactor
(
const
XprType
&
x
)
{
return
Base
::
extractScalarFactor
(
x
.
nestedExpression
());
}
};
...
...
Prev
1
…
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment