Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
6835a32e
Commit
6835a32e
authored
May 05, 2019
by
Shucai Xiao
Browse files
backup temp code changes.
parent
5e24fdf9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
0 deletions
+27
-0
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+4
-0
src/targets/gpu/quant_gemm.cpp
src/targets/gpu/quant_gemm.cpp
+23
-0
No files found.
src/targets/gpu/gemm.cpp
View file @
6835a32e
...
@@ -215,6 +215,10 @@ argument miopen_gemm::compute(context& ctx,
...
@@ -215,6 +215,10 @@ argument miopen_gemm::compute(context& ctx,
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
if
(
num_matrices
==
1
)
if
(
num_matrices
==
1
)
{
{
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
generic_rocblas_gemm
(
as
,
generic_rocblas_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
...
src/targets/gpu/quant_gemm.cpp
View file @
6835a32e
...
@@ -90,10 +90,33 @@ argument miopen_quant_gemm::compute(context& ctx,
...
@@ -90,10 +90,33 @@ argument miopen_quant_gemm::compute(context& ctx,
assert
(
!
transa
or
(
lda
%
4
==
0
));
assert
(
!
transa
or
(
lda
%
4
==
0
));
assert
(
transb
or
(
ldb
%
4
==
0
));
assert
(
transb
or
(
ldb
%
4
==
0
));
// need to pack B in thi scenario
if
(
!
transb
)
{
int
nb
=
4
;
for
(
int
i_m
=
0
;
i_m
<
m
;
i_m
++
)
{
for
(
int
i_k
=
0
;
i_k
<
k
;
i_k
++
)
{
A_packed
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
]
=
A
[
i_m
+
i_k
*
lda
];
}
}
}
// need to pack A in this scenario
if
(
transa
)
{
}
auto
num_matrices
=
std
::
accumulate
(
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
if
(
num_matrices
==
1
)
if
(
num_matrices
==
1
)
{
{
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
generic_rocblas_gemm_ex
(
ctx
.
get_stream
().
get_rocblas
(),
generic_rocblas_gemm_ex
(
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment