Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
154b9287
Commit
154b9287
authored
Apr 03, 2019
by
Shucai Xiao
Browse files
code backup.
parent
8a45e79f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
296 additions
and
58 deletions
+296
-58
src/include/migraphx/operators.hpp
src/include/migraphx/operators.hpp
+1
-1
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+295
-57
No files found.
src/include/migraphx/operators.hpp
View file @
154b9287
...
...
@@ -857,7 +857,7 @@ struct dot
out_lens
[
dim_1
]
=
b
.
lens
()[
dim_1
];
if
(
inputs
.
size
()
==
3
&&
out_lens
!=
inputs
.
at
(
2
).
lens
())
{
MIGRAPHX_THROW
(
"DOT: dimension mismatch, operand C: {"
+
to_string_range
(
c_
lens
)
+
MIGRAPHX_THROW
(
"DOT: dimension mismatch, operand C: {"
+
to_string_range
(
inputs
.
at
(
2
).
lens
()
)
+
"}, cannot add to operand A * B: {"
+
to_string_range
(
out_lens
)
+
"}"
);
}
...
...
src/targets/gpu/gemm.cpp
View file @
154b9287
...
...
@@ -6,49 +6,127 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
template
<
class
...
Ts
>
void
generic_rocblas_
batched_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
scal
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
rocblas_s
gemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_s
scal
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_
batched_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
scal
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
rocblas_dgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_dscal
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_scal
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_SCAL: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_haxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_saxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_daxpy
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_axpy
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_AXPY: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
void
generic_rocblas_
batched_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_
dot
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
rocblas_hgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_sdot
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_ddot
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
void
generic_rocblas_batched_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
rocblas_status
generic_rocblas_dot
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_DOT: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemv
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemv
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_gemv
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_dgemv
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_gemv
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_GEMMV: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
return
rocblas_sgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
return
rocblas_dgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
return
rocblas_hgemm_strided_batched
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
rocblas_status
generic_rocblas_batched_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_BATCHED_GEMM: type unsupported by rocblas"
);
}
template
<
class
...
Ts
>
void
generic_rocblas_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
float
>
,
Ts
&&
...
xs
)
{
rocblas_sgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_sgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
double
>
,
Ts
&&
...
xs
)
{
rocblas_dgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_dgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
...
Ts
>
void
generic_rocblas_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
half
>
,
Ts
&&
...
xs
)
{
rocblas_hgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
return
rocblas_hgemm
(
std
::
forward
<
Ts
>
(
xs
)...);
}
template
<
class
T
,
class
...
Ts
>
void
generic_rocblas_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
rocblas_status
generic_rocblas_gemm
(
shape
::
as
<
T
>
,
Ts
&&
...)
{
MIGRAPHX_THROW
(
"GENERIC_ROCBLAS_GEMM: type unsupported by rocblas"
);
}
...
...
@@ -90,34 +168,194 @@ rocblas_half to_rocblas_type(half x) { return reinterpret_cast<const rocblas_hal
shape
miopen_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
std
::
vector
<
shape
>
input_shapes
(
inputs
.
begin
(),
inputs
.
begin
()
+
inputs
.
size
()
-
1
);
if
(
input_shapes
.
size
()
==
3
)
{
auto
c_shape
=
inputs
[
2
];
check_shapes
{{
c_shape
}}.
not_broadcasted
();
}
return
op
.
compute_shape
(
input_shapes
);
}
argument
miopen_gemm
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1.0
f
;
float
beta
=
0.0
f
;
bool
is_3inputs
=
(
args
.
size
()
==
4
);
if
(
is_3inputs
)
{
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
hipMemcpy
(
to_pointer
(
args
[
3
]),
to_pointer
(
args
[
2
]),
output_shape
.
bytes
(),
hipMemcpyDeviceToDevice
);
});
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
dim_1
=
n_dim
-
1
;
auto
dim_0
=
n_dim
-
2
;
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
op
.
beta
));
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
std
::
size_t
n_dims
=
args
[
0
].
get_shape
().
lens
().
size
();
std
::
size_t
dim_0
=
n_dims
-
2
;
std
::
size_t
dim_1
=
n_dims
-
1
;
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
dim_1
:
dim_0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
dim_1
:
dim_0
];
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
dim_0
];
rocblas_int
ldc
=
args
[
3
].
get_shape
().
strides
()[
dim_0
];
auto
out_lens
=
output_shape
.
lens
();
rocblas_int
m
=
out_lens
[
dim_0
];
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
auto
batch_num
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
generic_rocblas_batched_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
m
,
k
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
k
*
n
,
to_pointer
(
args
[
0
]),
lda
,
m
*
k
,
&
beta_r
,
to_pointer
(
args
[
3
]),
ldc
,
m
*
n
,
num_matrices
);
});
return
args
[
3
];
}
// 2 input argument cases
// vector inner product
auto
a_lens
=
args
[
0
].
get_shape
().
lens
();
auto
b_lens
=
args
[
1
].
get_shape
().
lens
();
if
(
output_shape
.
elements
()
==
1
)
{
assert
(
args
[
0
].
get_shape
().
elements
()
==
args
[
1
].
get_shape
().
elements
());
float
beta
=
0.0
f
;
rocblas_int
elem_num
=
static_cast
<
rocblas_int
>
(
args
[
0
].
get_shape
().
elements
());
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
// the function generic_rocblas_dot is not stable, so have to
// call the gemm function instead. In the future, we may change
// to call generic_rocblas_dot when it is stable.
generic_rocblas_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
rocblas_operation_none
,
rocblas_operation_none
,
1
,
1
,
elem_num
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
1
,
to_pointer
(
args
[
0
]),
elem_num
,
&
beta_r
,
to_pointer
(
args
[
2
]),
1
);
});
}
// matrix * vector (b is a vector)
else
if
(
b_lens
.
size
()
==
2
&&
b_lens
.
at
(
1
)
==
1
)
{
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
rocblas_int
m
=
static_cast
<
rocblas_int
>
(
a_lens
[
0
]);
rocblas_int
n
=
static_cast
<
rocblas_int
>
(
a_lens
[
1
]);
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
1
:
0
];
float
beta
=
0.0
f
;
assert
(
a_lens
.
back
()
==
args
[
1
].
get_shape
().
elements
());
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
alpha_r
=
to_rocblas_type
(
as
(
alpha
));
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
generic_rocblas_batched_gemm
(
as
,
generic_rocblas_gemv
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
m
,
n
,
&
alpha_r
,
to_pointer
(
args
[
0
]),
lda
,
to_pointer
(
args
[
1
]),
1
,
&
beta_r
,
to_pointer
(
args
[
2
]),
1
);
});
}
// vector * matrix (a is a vector)
else
if
(
a_lens
.
size
()
==
2
&&
a_lens
.
at
(
0
)
==
1
)
{
bool
transb
=
!
args
[
1
].
get_shape
().
transposed
();
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[(
transb
?
1
:
0
)];
rocblas_int
m
=
b_lens
[
0
];
rocblas_int
n
=
b_lens
[
1
];
float
beta
=
0.0
f
;
assert
(
b_lens
[
0
]
==
args
[
0
].
get_shape
().
elements
());
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
generic_rocblas_gemv
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
m
,
n
,
&
alpha_r
,
to_pointer
(
args
[
1
]),
ldb
,
to_pointer
(
args
[
0
]),
1
,
&
beta_r
,
to_pointer
(
args
[
2
]),
1
);
});
}
// batch matrix multiplication
else
{
output_shape
.
visit_type
([
&
](
auto
as
)
{
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
dim_1
=
n_dim
-
1
;
auto
dim_0
=
n_dim
-
2
;
float
beta
=
0.0
f
;
auto
alpha_r
=
to_rocblas_type
(
as
(
op
.
alpha
));
auto
beta_r
=
to_rocblas_type
(
as
(
beta
));
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
dim_1
:
dim_0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
dim_1
:
dim_0
];
rocblas_int
ldc
=
args
[
3
].
get_shape
().
strides
()[
dim_0
];
auto
out_lens
=
output_shape
.
lens
();
rocblas_int
m
=
out_lens
[
dim_0
];
rocblas_int
n
=
out_lens
[
dim_1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
dim_1
];
auto
num_matrices
=
std
::
accumulate
(
out_lens
.
rbegin
()
+
2
,
out_lens
.
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
generic_rocblas_batched_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
...
@@ -135,9 +373,9 @@ argument miopen_gemm::compute(context& ctx,
to_pointer
(
args
[
2
]),
ldc
,
m
*
n
,
batch_num
);
num_matrices
);
});
}
return
args
[
2
];
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment