Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
180dc7a0
Commit
180dc7a0
authored
Oct 06, 2022
by
Paul
Browse files
Merge branch 'develop' into jit-unroll-stride
parents
e535f7ef
f7d987ba
Changes
44
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
809 additions
and
130 deletions
+809
-130
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+8
-1
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+28
-0
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+5
-1
src/targets/gpu/jit/gathernd.cpp
src/targets/gpu/jit/gathernd.cpp
+1
-1
src/targets/gpu/jit/softmax.cpp
src/targets/gpu/jit/softmax.cpp
+5
-0
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+8
-8
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
+9
-5
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+34
-17
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+33
-0
src/value.cpp
src/value.cpp
+1
-8
test/api/test_custom_op.cpp
test/api/test_custom_op.cpp
+43
-0
test/api/test_custom_op_gpu.cpp
test/api/test_custom_op_gpu.cpp
+258
-37
test/api/test_gpu.cpp
test/api/test_gpu.cpp
+60
-2
test/gpu/mlir.cpp
test/gpu/mlir.cpp
+0
-4
test/gpu/pack_int8_args.cpp
test/gpu/pack_int8_args.cpp
+49
-46
test/gpu/stream_sync.cpp
test/gpu/stream_sync.cpp
+146
-0
test/py/CMakeLists.txt
test/py/CMakeLists.txt
+1
-0
test/py/test_gpu_async.py
test/py/test_gpu_async.py
+74
-0
test/verify/test_softmax_large3.cpp
test/verify/test_softmax_large3.cpp
+43
-0
tools/accuracy/accuracy_checker.py
tools/accuracy/accuracy_checker.py
+3
-0
No files found.
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
180dc7a0
...
@@ -39,6 +39,10 @@ struct miopen_convolution
...
@@ -39,6 +39,10 @@ struct miopen_convolution
op
::
convolution
op
;
op
::
convolution
op
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvFwdAlgorithm_t
algo
{};
miopenConvFwdAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
value
::
binary
solution_object
{};
shared
<
miopen_solution
>
solution_ptr
=
nullptr
;
#endif
uint64_t
solution_id
=
0
;
uint64_t
solution_id
=
0
;
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
...
@@ -49,6 +53,9 @@ struct miopen_convolution
...
@@ -49,6 +53,9 @@ struct miopen_convolution
f
(
self
.
op
.
dilation
,
"dilation"
),
f
(
self
.
op
.
dilation
,
"dilation"
),
f
(
self
.
op
.
group
,
"group"
),
f
(
self
.
op
.
group
,
"group"
),
f
(
self
.
op
.
padding_mode
,
"padding_mode"
),
f
(
self
.
op
.
padding_mode
,
"padding_mode"
),
#ifdef MIGRAPHX_HAS_FIND_2_API
f
(
self
.
solution_object
,
"solution_object"
),
#endif
f
(
self
.
solution_id
,
"solution_id"
));
f
(
self
.
solution_id
,
"solution_id"
));
}
}
...
@@ -57,7 +64,7 @@ struct miopen_convolution
...
@@ -57,7 +64,7 @@ struct miopen_convolution
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>
&
inputs
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
180dc7a0
...
@@ -70,6 +70,34 @@ Result make_obj(F f, Ts... xs)
...
@@ -70,6 +70,34 @@ Result make_obj(F f, Ts... xs)
return
r
;
return
r
;
}
}
#ifdef MIGRAPHX_HAS_FIND_2_API
using
miopen_find_options
=
MIGRAPHX_MANAGE_PTR
(
miopenFindOptions_t
,
miopenDestroyFindOptions
);
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
inline
miopen_solution
find_solution
(
miopenHandle_t
handle
,
miopenProblem_t
problem
)
{
miopenSolution_t
solution
;
size_t
found
=
0
;
auto
status
=
miopenFindSolutions
(
handle
,
problem
,
nullptr
,
&
solution
,
&
found
,
1
);
auto
result
=
miopen_solution
{
solution
};
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
MIGRAPHX_THROW
(
"MIOpen miopenFindSolutions failed"
);
return
result
;
}
inline
void
set_tensor_descriptor
(
miopenTensorArgumentId_t
name
,
tensor_descriptor
&
desc
,
miopen_problem
&
problem_ptr
)
{
auto
status
=
miopenSetProblemTensorDescriptor
(
problem_ptr
.
get
(),
name
,
desc
.
get
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"setting problem tensor description failed"
);
}
}
#endif
inline
tensor_descriptor
make_tensor
(
const
migraphx
::
shape
&
os
,
bool
pack
=
false
)
inline
tensor_descriptor
make_tensor
(
const
migraphx
::
shape
&
os
,
bool
pack
=
false
)
{
{
auto
s
=
os
.
normalize_standard
();
auto
s
=
os
.
normalize_standard
();
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
180dc7a0
...
@@ -23,7 +23,6 @@
...
@@ -23,7 +23,6 @@
*/
*/
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <rocblas.h>
#include <rocblas.h>
...
@@ -37,6 +36,11 @@ using rocblas_handle_ptr = MIGRAPHX_MANAGE_PTR(rocblas_handle, rocblas_destroy_h
...
@@ -37,6 +36,11 @@ using rocblas_handle_ptr = MIGRAPHX_MANAGE_PTR(rocblas_handle, rocblas_destroy_h
rocblas_handle_ptr
create_rocblas_handle_ptr
();
rocblas_handle_ptr
create_rocblas_handle_ptr
();
rocblas_handle_ptr
create_rocblas_handle_ptr
(
hipStream_t
s
);
rocblas_handle_ptr
create_rocblas_handle_ptr
(
hipStream_t
s
);
struct
context
;
bool
get_compute_fp32_flag
();
bool
get_int8_x4_format
(
context
&
ctx
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/jit/gathernd.cpp
View file @
180dc7a0
...
@@ -65,7 +65,7 @@ struct gathernd_compiler : compiler<gathernd_compiler>
...
@@ -65,7 +65,7 @@ struct gathernd_compiler : compiler<gathernd_compiler>
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
{
hip_compile_options
options
;
hip_compile_options
options
;
auto
out_s
=
inputs
.
back
();
const
auto
&
out_s
=
inputs
.
back
();
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
out_s
.
elements
()));
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
out_s
.
elements
()));
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
output
=
out_s
;
options
.
output
=
out_s
;
...
...
src/targets/gpu/jit/softmax.cpp
View file @
180dc7a0
...
@@ -32,6 +32,8 @@ namespace migraphx {
...
@@ -32,6 +32,8 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_USE_FAST_SOFTMAX
)
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
static
const
char
*
const
softmax_kernel
=
R"__migraphx__(
static
const
char
*
const
softmax_kernel
=
R"__migraphx__(
...
@@ -81,6 +83,9 @@ struct softmax_compiler : compiler<softmax_compiler>
...
@@ -81,6 +83,9 @@ struct softmax_compiler : compiler<softmax_compiler>
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
kernel_name
=
"softmax_kernel"
;
options
.
kernel_name
=
"softmax_kernel"
;
if
(
enabled
(
MIGRAPHX_USE_FAST_SOFTMAX
{}))
options
.
params
=
"-DMIGRAPHX_USE_FAST_SOFTMAX"
;
auto
src
=
interpolate_string
(
auto
src
=
interpolate_string
(
softmax_kernel
,
softmax_kernel
,
{{
"transformers"
,
make_transformer_args
(
vec
)},
{
"axis"
,
to_string
(
axis
)}});
{{
"transformers"
,
make_transformer_args
(
vec
)},
{
"axis"
,
to_string
(
axis
)}});
...
...
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
180dc7a0
...
@@ -197,11 +197,11 @@ struct block
...
@@ -197,11 +197,11 @@ struct block
struct
reducer
struct
reducer
{
{
index
idx
;
index
idx
;
Slicer
slice
r
;
Slicer
slice
;
template
<
class
Op
,
class
T
,
class
Read
>
template
<
class
Op
,
class
T
,
class
Read
>
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
{
{
return
sliced
(
slice
r
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
sliced
(
slice
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
block_reduce
(
idx
,
op
,
init
,
x
.
get_shape
().
elements
(),
[
&
](
auto
j
)
{
return
block_reduce
(
idx
,
op
,
init
,
x
.
get_shape
().
elements
(),
[
&
](
auto
j
)
{
return
vec_reduce
(
read
(
x
[
j
],
xs
[
j
]...),
op
);
return
vec_reduce
(
read
(
x
[
j
],
xs
[
j
]...),
op
);
});
});
...
@@ -218,7 +218,7 @@ struct block
...
@@ -218,7 +218,7 @@ struct block
template
<
class
F
>
template
<
class
F
>
__device__
auto
inner
(
F
f
)
const
__device__
auto
inner
(
F
f
)
const
{
{
return
sliced
(
slice
r
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
sliced
(
slice
,
[
=
](
auto
x
,
auto
...
xs
)
{
idx
.
local_stride
(
x
.
get_shape
().
elements
(),
[
&
](
auto
j
)
{
f
(
x
[
j
],
xs
[
j
]...);
});
idx
.
local_stride
(
x
.
get_shape
().
elements
(),
[
&
](
auto
j
)
{
f
(
x
[
j
],
xs
[
j
]...);
});
});
});
}
}
...
@@ -226,7 +226,7 @@ struct block
...
@@ -226,7 +226,7 @@ struct block
template
<
class
Input
>
template
<
class
Input
>
constexpr
auto
elements
()
const
constexpr
auto
elements
()
const
{
{
using
reduce_type
=
decltype
(
slice
r
(
Input
{}));
using
reduce_type
=
decltype
(
slice
(
Input
{}));
using
value_type
=
typename
Input
::
type
;
using
value_type
=
typename
Input
::
type
;
constexpr
auto
relements
=
get_shape_c
<
reduce_type
>
{}.
elements
();
constexpr
auto
relements
=
get_shape_c
<
reduce_type
>
{}.
elements
();
if
constexpr
(
vec_size
<
value_type
>
()
>
1
)
if
constexpr
(
vec_size
<
value_type
>
()
>
1
)
...
@@ -260,11 +260,11 @@ struct lane
...
@@ -260,11 +260,11 @@ struct lane
struct
reducer
struct
reducer
{
{
index
idx
;
index
idx
;
Slicer
slice
r
;
Slicer
slice
;
template
<
class
Op
,
class
T
,
class
Read
>
template
<
class
Op
,
class
T
,
class
Read
>
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
{
{
return
sliced
(
slice
r
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
sliced
(
slice
,
[
=
](
auto
x
,
auto
...
xs
)
{
using
type
=
typename
decltype
(
x
)
::
type
;
using
type
=
typename
decltype
(
x
)
::
type
;
type
r
=
init
;
type
r
=
init
;
for
(
index_int
j
=
0
;
j
<
x
.
get_shape
().
elements
();
j
++
)
for
(
index_int
j
=
0
;
j
<
x
.
get_shape
().
elements
();
j
++
)
...
@@ -284,7 +284,7 @@ struct lane
...
@@ -284,7 +284,7 @@ struct lane
template
<
class
F
>
template
<
class
F
>
__device__
auto
inner
(
F
f
)
const
__device__
auto
inner
(
F
f
)
const
{
{
return
sliced
(
slice
r
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
sliced
(
slice
,
[
=
](
auto
x
,
auto
...
xs
)
{
for
(
index_int
j
=
0
;
j
<
x
.
get_shape
().
elements
();
j
++
)
for
(
index_int
j
=
0
;
j
<
x
.
get_shape
().
elements
();
j
++
)
{
{
f
(
x
[
j
],
xs
[
j
]...);
f
(
x
[
j
],
xs
[
j
]...);
...
@@ -295,7 +295,7 @@ struct lane
...
@@ -295,7 +295,7 @@ struct lane
template
<
class
Input
>
template
<
class
Input
>
constexpr
auto
elements
()
const
constexpr
auto
elements
()
const
{
{
using
reduce_type
=
decltype
(
slice
r
(
Input
{}));
using
reduce_type
=
decltype
(
slice
(
Input
{}));
return
get_shape_c
<
reduce_type
>
{}.
elements
();
return
get_shape_c
<
reduce_type
>
{}.
elements
();
}
}
};
};
...
...
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
View file @
180dc7a0
...
@@ -33,11 +33,15 @@ template <index_int Axis, class Input, class Output>
...
@@ -33,11 +33,15 @@ template <index_int Axis, class Input, class Output>
__device__
void
softmax
(
Input
input
,
Output
output
)
__device__
void
softmax
(
Input
input
,
Output
output
)
{
{
reduce
::
block
::
run
<
reduce
::
with_axis
<
Input
,
Axis
>>
([
&
](
auto
,
auto
r
)
{
reduce
::
block
::
run
<
reduce
::
with_axis
<
Input
,
Axis
>>
([
&
](
auto
,
auto
r
)
{
auto
batch_max
=
r
.
reduce
(
op
::
max
{},
lowest
{},
op
::
id
{})(
input
);
#ifdef MIGRAPHX_USE_FAST_SOFTMAX
auto
batch_sum
=
const
auto
c
=
vec_at
(
r
.
slice
(
input
)[
0
],
0
);
r
.
reduce
(
op
::
sum
{},
0
,
[
&
](
auto
x
)
{
return
migraphx
::
exp
(
x
-
batch_max
);
})(
input
);
#else
r
.
inner
([
&
](
auto
&
y
,
auto
x
)
{
y
=
migraphx
::
exp
(
x
-
batch_max
)
/
batch_sum
;
})(
output
,
const
auto
c
=
r
.
reduce
(
op
::
max
{},
lowest
{},
op
::
id
{})(
input
);
input
);
#endif
auto
batch_sum
=
r
.
reduce
(
op
::
sum
{},
0
,
[
&
](
auto
x
)
{
return
migraphx
::
convert
<
float
>
(
migraphx
::
exp
(
x
-
c
));
})(
input
);
r
.
inner
([
&
](
auto
&
y
,
auto
x
)
{
y
=
migraphx
::
exp
(
x
-
c
)
/
batch_sum
;
})(
output
,
input
);
});
});
}
}
...
...
src/targets/gpu/lowering.cpp
View file @
180dc7a0
...
@@ -26,6 +26,8 @@
...
@@ -26,6 +26,8 @@
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/deconvolution.hpp>
...
@@ -81,26 +83,14 @@ struct miopen_apply
...
@@ -81,26 +83,14 @@ struct miopen_apply
(
void
)
i
;
(
void
)
i
;
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
void
init
()
void
init
()
{
{
assert
(
mod
!=
nullptr
);
assert
(
mod
!=
nullptr
);
assert
(
pass
!=
nullptr
);
assert
(
pass
!=
nullptr
);
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
&
ctx
=
get_context
();
auto
&
ctx
=
get_context
();
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
int8_x4_format
=
get_int8_x4_format
(
ctx
);
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
get_compute_fp32_flag
();
compute_fp32
=
true
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
...
@@ -184,6 +174,7 @@ struct miopen_apply
...
@@ -184,6 +174,7 @@ struct miopen_apply
for
(
auto
it
=
mod
->
begin
();
it
!=
mod
->
end
();
it
++
)
for
(
auto
it
=
mod
->
begin
();
it
!=
mod
->
end
();
it
++
)
{
{
auto
s
=
it
->
get_shape
();
auto
s
=
it
->
get_shape
();
auto
attrs
=
it
->
get_operator
().
attributes
();
if
(
apply_map
.
count
(
it
->
name
())
>
0
)
if
(
apply_map
.
count
(
it
->
name
())
>
0
)
{
{
check_shape
(
s
,
apply_map
.
at
(
it
->
name
())(
it
));
check_shape
(
s
,
apply_map
.
at
(
it
->
name
())(
it
));
...
@@ -192,11 +183,37 @@ struct miopen_apply
...
@@ -192,11 +183,37 @@ struct miopen_apply
{
{
check_shape
(
s
,
insert_precompile_op
(
it
));
check_shape
(
s
,
insert_precompile_op
(
it
));
}
}
else
if
(
attrs
.
contains
(
"target"
))
{
check_shape
(
s
,
insert_custom_op
(
it
,
attrs
));
}
}
}
copy_params
();
copy_params
();
}
}
instruction_ref
insert_custom_op
(
instruction_ref
ins
,
const
value
&
attrs
)
const
{
const
auto
&
custom_op
=
ins
->
get_operator
();
if
(
attrs
.
at
(
"target"
)
==
"cpu"
)
{
auto
s
=
ins
->
get_shape
();
std
::
vector
<
instruction_ref
>
cpu_inputs
;
auto
inputs
=
ins
->
inputs
();
auto
output
=
inputs
.
back
();
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
cpu_inputs
),
[
&
](
auto
in
)
{
return
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::copy_from_gpu"
),
in
);
});
cpu_inputs
.
front
()
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::sync_stream"
),
cpu_inputs
);
auto
cpu_out
=
mod
->
insert_instruction
(
ins
,
custom_op
,
cpu_inputs
);
auto
gpu_out
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::copy_to_gpu"
),
cpu_out
,
output
);
return
mod
->
replace_instruction
(
ins
,
gpu_out
);
}
return
ins
;
}
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
const
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
const
{
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
...
...
src/targets/gpu/rocblas.cpp
View file @
180dc7a0
...
@@ -21,7 +21,13 @@
...
@@ -21,7 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <unordered_set>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -41,6 +47,33 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
...
@@ -41,6 +47,33 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
return
rb
;
return
rb
;
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
bool
get_compute_fp32_flag
()
{
bool
compute_fp32
=
false
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
true
;
#endif
return
compute_fp32
;
}
bool
get_int8_x4_format
(
context
&
ctx
)
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
return
int8_x4_format
;
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/value.cpp
View file @
180dc7a0
...
@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
...
@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
os
<<
"}"
;
os
<<
"}"
;
}
}
void
print_value
(
std
::
ostream
&
os
,
const
value
::
binary
&
x
)
void
print_value
(
std
::
ostream
&
os
,
const
value
::
binary
&
x
)
{
os
<<
x
;
}
{
// Convert binary to integers
std
::
vector
<
int
>
v
(
x
.
begin
(),
x
.
end
());
os
<<
"{"
;
os
<<
to_string_range
(
v
);
os
<<
"}"
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
value
&
d
)
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
value
&
d
)
{
{
...
...
test/api/test_custom_op.cpp
View file @
180dc7a0
...
@@ -43,6 +43,8 @@ struct sigmoid_custom_op final : migraphx::experimental_custom_op_base
...
@@ -43,6 +43,8 @@ struct sigmoid_custom_op final : migraphx::experimental_custom_op_base
return
inputs
[
1
];
return
inputs
[
1
];
}
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
{
if
(
inputs
.
size
()
!=
2
)
if
(
inputs
.
size
()
!=
2
)
...
@@ -111,4 +113,45 @@ TEST_CASE(run_sigmoid_with_incorrect_shape)
...
@@ -111,4 +113,45 @@ TEST_CASE(run_sigmoid_with_incorrect_shape)
"Error in compute_shape of: sigmoid_custom_op: op must have two inputs"
));
"Error in compute_shape of: sigmoid_custom_op: op must have two inputs"
));
}
}
struct
identity_custom_op
final
:
migraphx
::
experimental_custom_op_base
{
virtual
std
::
string
name
()
const
override
{
return
"identity_custom_op"
;
}
virtual
migraphx
::
argument
compute
(
migraphx
::
context
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
return
inputs
[
0
];
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
if
(
inputs
.
size
()
!=
1
)
{
throw
std
::
runtime_error
(
"Identity op must have only one input"
);
}
return
inputs
.
back
();
}
virtual
std
::
vector
<
size_t
>
output_alias
(
migraphx
::
shapes
)
const
override
{
return
{
0
,
1
};
}
};
TEST_CASE
(
run_custom_op_with_invalid_output_alias
)
{
identity_custom_op
i_op
;
migraphx
::
register_experimental_custom_op
(
i_op
);
auto
op
=
migraphx
::
operation
(
"identity_custom_op"
);
EXPECT
(
op
.
name
()
==
"identity_custom_op"
);
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx_shape_float_type
,
{
12
}};
migraphx
::
module
m
=
p
.
get_main_module
();
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
auto
i_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"identity_custom_op"
),
{
x
});
migraphx_test_private_disable_exception_catch
(
true
);
EXPECT
(
test
::
throws
<
std
::
exception
>
(
[
&
]
{
p
.
compile
(
migraphx
::
target
(
"ref"
));
},
"Currently, CustomOps in MIGraphX only supports one output_alias"
));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/api/test_custom_op_gpu.cpp
View file @
180dc7a0
...
@@ -24,40 +24,89 @@
...
@@ -24,40 +24,89 @@
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime_api.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>
#include <migraphx/migraphx.hpp>
#include <numeric>
#include <stdexcept>
#include <stdexcept>
#include "test.hpp"
#include "test.hpp"
#define MIGRAPHX_HIP_ASSERT(x) (EXPECT(x == hipSuccess))
#define MIGRAPHX_HIP_ASSERT(x) (EXPECT(x == hipSuccess))
struct
simple_custom_op
final
:
migraphx
::
experimental_custom_op_base
struct
half_copy_host
final
:
migraphx
::
experimental_custom_op_base
{
{
virtual
std
::
string
name
()
const
override
{
return
"simple_custom_op"
;
}
virtual
std
::
string
name
()
const
override
{
return
"half_copy_host"
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
false
;
}
virtual
migraphx
::
argument
virtual
migraphx
::
argument
compute
(
migraphx
::
context
ctx
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
compute
(
migraphx
::
context
ctx
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
{
// sets first half size_bytes of the input 0, and rest of the half bytes are copied.
// This custom op simply sets first half size_bytes of the input to 0, and rest of the half
int
*
h_output
=
nullptr
;
// bytes are copied. for this custom_op, it does its computation on the host. Therefore,
auto
*
d_output
=
reinterpret_cast
<
int
*>
(
inputs
[
0
].
data
());
// `runs_on_offload_target()` is set to false. MIGraphX would inject necessary buffer copies
// to and from GPU to Host based on `runs_on_offload_targe()` flag for input buffers as well
// as the output buffers
auto
*
input_buffer_ptr
=
inputs
[
0
].
data
();
auto
*
output_buffer_ptr
=
inputs
[
1
].
data
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
*
output_ptr
=
inputs
[
1
].
data
();
auto
copy_bytes
=
input_bytes
/
2
;
auto
copy_bytes
=
input_bytes
/
2
;
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipHostMalloc
(
&
h_output
,
input_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemcpyAsync
(
output_buffer_ptr
,
MIGRAPHX_HIP_ASSERT
(
hipMemcpyAsync
(
input_buffer_ptr
,
h_output
,
d_output
,
input_bytes
,
hipMemcpyDeviceToHost
,
ctx
.
get_queue
<
hipStream_t
>
()));
input_bytes
,
hipMemcpyHostToHost
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
h_
output
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
output
_buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemcpy
(
output_ptr
,
h_output
,
input_bytes
,
hipMemcpyHostToDevice
));
return
inputs
[
1
];
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
if
(
not
inputs
[
0
].
standard
()
or
not
inputs
[
1
].
standard
())
{
throw
std
::
runtime_error
(
"Input args must be standard shaped"
);
}
if
(
inputs
.
size
()
!=
2
)
{
throw
std
::
runtime_error
(
"number of inputs must be 2"
);
}
return
inputs
.
back
();
}
};
struct
half_copy_device
final
:
migraphx
::
experimental_custom_op_base
{
virtual
std
::
string
name
()
const
override
{
return
"half_copy_device"
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
argument
compute
(
migraphx
::
context
ctx
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
// This custom op simply sets first half size_bytes of the input to 0, and rest of the half
// bytes are copied. for this custom_op, it does its computation on the "GPU". Therefore,
// `runs_on_offload_target()` is set to "true".
auto
*
input_buffer_ptr
=
inputs
[
0
].
data
();
auto
*
output_buffer_ptr
=
inputs
[
1
].
data
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
copy_bytes
=
input_bytes
/
2
;
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipMemcpyAsync
(
output_buffer_ptr
,
input_buffer_ptr
,
input_bytes
,
hipMemcpyDeviceToDevice
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
output_buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipHostFree
(
h_output
));
return
inputs
[
1
];
return
inputs
[
1
];
}
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
{
if
(
not
inputs
[
0
].
standard
())
if
(
not
inputs
[
0
].
standard
()
or
not
inputs
[
1
].
standard
()
)
{
{
throw
std
::
runtime_error
(
"
firs
t arg must be standard shaped"
);
throw
std
::
runtime_error
(
"
Inpu
t arg
s
must be standard shaped"
);
}
}
if
(
inputs
.
size
()
!=
2
)
if
(
inputs
.
size
()
!=
2
)
{
{
...
@@ -67,36 +116,208 @@ struct simple_custom_op final : migraphx::experimental_custom_op_base
...
@@ -67,36 +116,208 @@ struct simple_custom_op final : migraphx::experimental_custom_op_base
}
}
};
};
TEST_CASE
(
run_simple_custom_op
)
// overwrites input buffer
struct
half_copy_device_same_buffer
final
:
migraphx
::
experimental_custom_op_base
{
virtual
std
::
string
name
()
const
override
{
return
"half_copy_device_same_buffer"
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
argument
compute
(
migraphx
::
context
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
// This custom op simply sets first half size_bytes of the input 0, and rest of the half
// bytes are copied. for this custom_op, it does its computation on the "device". Therefore,
// `runs_on_offload_target()` is set to "true"
auto
*
buffer_ptr
=
inputs
[
0
].
data
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
copy_bytes
=
input_bytes
/
2
;
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
inputs
[
0
];
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
if
(
not
inputs
[
0
].
standard
())
{
throw
std
::
runtime_error
(
"Input arg must be standard shaped"
);
}
return
inputs
.
front
();
}
};
TEST_CASE
(
register_half_copy_op
)
{
half_copy_host
hch
;
migraphx
::
register_experimental_custom_op
(
hch
);
auto
op
=
migraphx
::
operation
(
"half_copy_host"
);
EXPECT
(
op
.
name
()
==
"half_copy_host"
);
half_copy_device
hcd
;
migraphx
::
register_experimental_custom_op
(
hcd
);
op
=
migraphx
::
operation
(
"half_copy_device"
);
EXPECT
(
op
.
name
()
==
"half_copy_device"
);
half_copy_device_same_buffer
hcdsb
;
migraphx
::
register_experimental_custom_op
(
hcdsb
);
op
=
migraphx
::
operation
(
"half_copy_device_same_buffer"
);
EXPECT
(
op
.
name
()
==
"half_copy_device_same_buffer"
);
}
TEST_CASE
(
half_copy_custom_op_test
)
{
{
simple_custom_op
simple_op
;
auto
run_test_prog
=
[](
const
std
::
string
&
op_name
,
bool
buffer_alloc
)
{
migraphx
::
register_experimental_custom_op
(
simple_op
);
migraphx
::
program
p
;
migraphx
::
module
m
=
p
.
get_main_module
();
migraphx
::
shape
s
{
migraphx_shape_float_type
,
{
4
,
3
}};
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
migraphx
::
instructions
inputs
=
{
x
};
if
(
buffer_alloc
)
{
auto
alloc
=
m
.
add_allocation
(
s
);
inputs
=
{
x
,
alloc
};
}
auto
half_copy_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
op_name
.
c_str
()),
inputs
);
m
.
add_return
({
half_copy_ins
});
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
migraphx
::
program_parameters
pp
;
std
::
vector
<
float
>
x_data
(
12
);
std
::
iota
(
x_data
.
begin
(),
x_data
.
end
(),
0
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
s
,
x_data
.
data
()));
auto
results
=
p
.
eval
(
pp
);
auto
result
=
results
[
0
];
auto
result_vec
=
result
.
as_vector
<
float
>
();
std
::
vector
<
float
>
expected_result
(
12
,
0
);
std
::
iota
(
expected_result
.
begin
()
+
6
,
expected_result
.
end
(),
6
);
EXPECT
(
bool
{
result
==
migraphx
::
argument
(
s
,
expected_result
.
data
())});
};
// register all the ops
half_copy_host
hch
;
migraphx
::
register_experimental_custom_op
(
hch
);
half_copy_device
hcd
;
migraphx
::
register_experimental_custom_op
(
hcd
);
half_copy_device_same_buffer
hcdsb
;
migraphx
::
register_experimental_custom_op
(
hcdsb
);
std
::
vector
<
std
::
pair
<
std
::
string
,
bool
>>
tests_config
=
{
{
"half_copy_host"
,
true
},
{
"half_copy_device"
,
true
},
{
"half_copy_device_same_buffer"
,
false
}};
for
(
const
auto
&
i
:
tests_config
)
{
run_test_prog
(
i
.
first
,
i
.
second
);
}
}
struct
stride_two
final
:
migraphx
::
experimental_custom_op_base
{
virtual
std
::
string
name
()
const
override
{
return
"stride_two"
;
}
virtual
migraphx
::
argument
compute
(
migraphx
::
context
,
migraphx
::
shape
out_shape
,
migraphx
::
arguments
inputs
)
const
override
{
return
{
out_shape
,
inputs
[
0
].
data
()};
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
if
(
inputs
.
size
()
!=
1
)
{
throw
std
::
runtime_error
(
"stride_two op must have only one input argument"
);
};
if
(
not
inputs
[
0
].
standard
())
{
throw
std
::
runtime_error
(
"stride_two op only works on the standard input shapes"
);
}
migraphx
::
shape
input_s
=
inputs
[
0
];
std
::
vector
<
size_t
>
dims
=
input_s
.
lengths
();
std
::
vector
<
size_t
>
new_dims
;
std
::
vector
<
size_t
>
strides
=
input_s
.
strides
();
std
::
vector
<
size_t
>
new_strides
;
std
::
for_each
(
dims
.
begin
(),
dims
.
end
(),
[
&
](
auto
i
)
{
new_dims
.
push_back
(
i
/
2
);
});
std
::
for_each
(
strides
.
begin
(),
strides
.
end
(),
[
&
](
auto
i
)
{
new_strides
.
push_back
(
i
*
2
);
});
migraphx
::
shape
output_shape
{
input_s
.
type
(),
new_dims
,
new_strides
};
return
output_shape
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
std
::
vector
<
size_t
>
output_alias
(
migraphx
::
shapes
)
const
override
{
return
{
0
};
};
};
TEST_CASE
(
stride_two_custom_op_test
)
{
stride_two
st
;
migraphx
::
register_experimental_custom_op
(
st
);
migraphx
::
program
p
;
migraphx
::
module
m
=
p
.
get_main_module
();
migraphx
::
shape
s
{
migraphx_shape_float_type
,
{
4
,
4
,
4
}};
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
auto
stride_two_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"stride_two"
),
{
x
});
m
.
add_return
({
stride_two_ins
});
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
migraphx
::
program_parameters
pp
;
std
::
vector
<
float
>
x_data
(
64
);
std
::
iota
(
x_data
.
begin
(),
x_data
.
end
(),
0
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
s
,
x_data
.
data
()));
auto
results
=
p
.
eval
(
pp
);
auto
result
=
results
[
0
];
auto
result_vec
=
result
.
as_vector
<
float
>
();
std
::
vector
<
float
>
expected_result
=
{
0
,
2
,
8
,
10
,
32
,
34
,
40
,
42
};
EXPECT
(
result_vec
==
expected_result
);
}
TEST_CASE
(
custom_op_with_pre_and_post_subgraph_test
)
{
half_copy_host
hco
;
migraphx
::
register_experimental_custom_op
(
hco
);
stride_two
st
;
migraphx
::
register_experimental_custom_op
(
st
);
migraphx
::
program
p
;
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx_shape_int32_type
,
{
4
,
3
}};
migraphx
::
shape
s
{
migraphx_shape_float_type
,
{
4
,
6
}};
migraphx
::
shape
trans_shape
{
migraphx_shape_int32_type
,
{
3
,
4
}};
migraphx
::
module
m
=
p
.
get_main_module
();
migraphx
::
module
m
=
p
.
get_main_module
();
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
auto
neg
=
m
.
add_instruction
(
migraphx
::
operation
(
"neg"
),
x
);
// pre-subgraph
auto
neg_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"neg"
),
x
);
auto
trans_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"transpose"
,
"{permutation: [1, 0]}"
),
{
neg_ins
});
auto
cont_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"contiguous"
),
{
trans_ins
});
// custom_op
migraphx
::
shape
trans_shape
{
migraphx_shape_float_type
,
{
6
,
4
}};
auto
alloc
=
m
.
add_allocation
(
trans_shape
);
auto
alloc
=
m
.
add_allocation
(
trans_shape
);
auto
neg_trans
=
auto
half_copy_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"transpose"
,
"{permutation: [1, 0]}"
),
{
neg
});
m
.
add_instruction
(
migraphx
::
operation
(
"half_copy_host"
),
{
cont_ins
,
alloc
});
auto
neg_cont
=
m
.
add_instruction
(
migraphx
::
operation
(
"contiguous"
),
{
neg_trans
});
// post-subgraph
auto
custom_kernel
=
auto
abs_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"abs"
),
{
half_copy_ins
});
m
.
add_instruction
(
migraphx
::
operation
(
"simple_custom_op"
),
{
neg_cont
,
alloc
});
// another custom_op
auto
relu
=
m
.
add_instruction
(
migraphx
::
operation
(
"relu"
),
custom_kernel
);
auto
stride_two_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"stride_two"
),
{
abs_ins
});
m
.
add_return
({
relu
});
// post-subgraph
auto
relu_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"relu"
),
{
stride_two_ins
});
m
.
add_return
({
relu_ins
});
migraphx
::
compile_options
options
;
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
options
.
set_offload_copy
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
migraphx
::
program_parameters
pp
;
migraphx
::
program_parameters
pp
;
std
::
vector
<
int
>
x_data
(
12
,
-
3
);
std
::
vector
<
float
>
x_data
(
s
.
elements
());
std
::
iota
(
x_data
.
begin
(),
x_data
.
end
(),
0
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
s
,
x_data
.
data
()));
pp
.
add
(
"x"
,
migraphx
::
argument
(
s
,
x_data
.
data
()));
auto
results
=
p
.
eval
(
pp
);
auto
results
=
p
.
eval
(
pp
);
auto
result
=
results
[
0
];
auto
result
=
results
[
0
];
auto
result_vec
=
result
.
as_vector
<
in
t
>
();
auto
result_vec
=
result
.
as_vector
<
floa
t
>
();
std
::
vector
<
in
t
>
expected_result
(
12
,
0
)
;
std
::
vector
<
floa
t
>
expected_result
=
{
0
,
0
,
0
,
0
,
4
,
16
}
;
std
::
fill
(
expected_result
.
begin
()
+
6
,
expected_result
.
end
(),
3
);
EXPECT
(
bool
{
result
==
migraphx
::
argument
(
migraphx
::
shape
{
migraphx_shape_float_type
,
{
3
,
2
}},
EXPECT
(
bool
{
result
==
migraphx
::
argument
(
trans_shape
,
expected_result
.
data
())});
expected_result
.
data
())});
}
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/api/test_gpu.cpp
View file @
180dc7a0
...
@@ -25,6 +25,8 @@
...
@@ -25,6 +25,8 @@
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime_api.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>
#include <migraphx/migraphx.hpp>
#include <migraphx/manage_ptr.hpp>
#include "test.hpp"
#include "test.hpp"
TEST_CASE
(
load_and_run
)
TEST_CASE
(
load_and_run
)
...
@@ -44,11 +46,67 @@ TEST_CASE(load_and_run)
...
@@ -44,11 +46,67 @@ TEST_CASE(load_and_run)
{
{
pp
.
add
(
name
,
migraphx
::
argument
::
generate
(
param_shapes
[
name
]));
pp
.
add
(
name
,
migraphx
::
argument
::
generate
(
param_shapes
[
name
]));
}
}
auto
outputs
=
p
.
eval
(
pp
);
auto
outputs
=
p
.
eval
(
pp
);
CHECK
(
shapes_before
.
size
()
==
outputs
.
size
());
CHECK
(
shapes_before
.
size
()
==
outputs
.
size
());
CHECK
(
bool
{
shapes_before
.
front
()
==
outputs
.
front
().
get_shape
()});
CHECK
(
bool
{
shapes_before
.
front
()
==
outputs
.
front
().
get_shape
()});
}
}
using
hip_ptr
=
MIGRAPHX_MANAGE_PTR
(
void
,
hipFree
);
using
stream_ptr
=
MIGRAPHX_MANAGE_PTR
(
hipStream_t
,
hipStreamDestroy
);
stream_ptr
get_stream
()
{
hipStream_t
stream
;
auto
err
=
hipStreamCreateWithFlags
(
&
stream
,
0
);
EXPECT
(
err
==
hipSuccess
);
return
stream_ptr
{
stream
};
}
hip_ptr
get_hip_buffer
(
size_t
size
)
{
void
*
ptr
;
auto
err
=
hipMalloc
(
&
ptr
,
size
);
EXPECT
(
err
==
hipSuccess
);
return
hip_ptr
{
ptr
};
}
TEST_CASE
(
load_and_run_async
)
{
auto
p
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
auto
shapes_before
=
p
.
get_output_shapes
();
migraphx
::
compile_options
options
;
options
.
set_offload_copy
(
false
);
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
auto
shapes_after
=
p
.
get_output_shapes
();
CHECK
(
shapes_before
.
size
()
==
1
);
CHECK
(
shapes_before
.
size
()
==
shapes_after
.
size
());
CHECK
(
bool
{
shapes_before
.
front
()
==
shapes_after
.
front
()});
migraphx
::
program_parameters
pp
;
auto
param_shapes
=
p
.
get_parameter_shapes
();
stream_ptr
stream
=
get_stream
();
std
::
vector
<
hip_ptr
>
buffs
;
std
::
vector
<
migraphx
::
argument
>
args
;
for
(
auto
&&
name
:
param_shapes
.
names
())
{
args
.
push_back
(
migraphx
::
argument
::
generate
(
param_shapes
[
name
]));
buffs
.
push_back
(
get_hip_buffer
(
args
.
rbegin
()
->
get_shape
().
bytes
()));
auto
err
=
hipMemcpy
(
buffs
.
rbegin
()
->
get
(),
args
.
rbegin
()
->
data
(),
args
.
rbegin
()
->
get_shape
().
bytes
(),
hipMemcpyHostToDevice
);
EXPECT
(
err
==
hipSuccess
);
pp
.
add
(
name
,
migraphx
::
argument
(
args
.
rbegin
()
->
get_shape
(),
buffs
.
rbegin
()
->
get
()));
}
auto
outputs
=
p
.
run_async
(
pp
,
stream
.
get
());
CHECK
(
shapes_before
.
size
()
==
outputs
.
size
());
CHECK
(
bool
{
shapes_before
.
front
()
==
outputs
.
front
().
get_shape
()});
}
TEST_CASE
(
load_and_run_ctx
)
TEST_CASE
(
load_and_run_ctx
)
{
{
auto
p
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
auto
p
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
...
@@ -82,10 +140,10 @@ TEST_CASE(if_pl_test)
...
@@ -82,10 +140,10 @@ TEST_CASE(if_pl_test)
migraphx
::
program_parameters
pp
;
migraphx
::
program_parameters
pp
;
auto
param_shapes
=
p
.
get_parameter_shapes
();
auto
param_shapes
=
p
.
get_parameter_shapes
();
auto
xs
=
param_shapes
[
"x"
];
auto
xs
=
param_shapes
[
"x"
];
std
::
vector
<
float
>
xd
(
xs
.
bytes
()
/
sizeof
(
float
),
1.0
);
std
::
vector
<
float
>
xd
(
xs
.
elements
(
),
1.0
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
xs
,
xd
.
data
()));
pp
.
add
(
"x"
,
migraphx
::
argument
(
xs
,
xd
.
data
()));
auto
ys
=
param_shapes
[
"y"
];
auto
ys
=
param_shapes
[
"y"
];
std
::
vector
<
float
>
yd
(
ys
.
bytes
()
/
sizeof
(
float
),
2.0
);
std
::
vector
<
float
>
yd
(
ys
.
elements
(
),
2.0
);
pp
.
add
(
"y"
,
migraphx
::
argument
(
ys
,
yd
.
data
()));
pp
.
add
(
"y"
,
migraphx
::
argument
(
ys
,
yd
.
data
()));
char
ccond
=
cond
;
char
ccond
=
cond
;
pp
.
add
(
"cond"
,
migraphx
::
argument
(
param_shapes
[
"cond"
],
&
ccond
));
pp
.
add
(
"cond"
,
migraphx
::
argument
(
param_shapes
[
"cond"
],
&
ccond
));
...
...
test/gpu/mlir.cpp
View file @
180dc7a0
...
@@ -37,10 +37,6 @@
...
@@ -37,10 +37,6 @@
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <test.hpp>
#include <test.hpp>
using
migraphx
::
trim
;
// m test_gpu_mlir && ./bin/test_gpu_mlir
struct
mlir_gpu_target
:
migraphx
::
gpu
::
target
struct
mlir_gpu_target
:
migraphx
::
gpu
::
target
{
{
std
::
string
name
()
const
{
return
"mlir"
;
}
std
::
string
name
()
const
{
return
"mlir"
;
}
...
...
test/gpu/pack_int8_args.cpp
View file @
180dc7a0
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
...
@@ -43,9 +44,8 @@
...
@@ -43,9 +44,8 @@
// Treat some operators as compilable to enable lowering
// Treat some operators as compilable to enable lowering
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
void
run_passes
(
migraphx
::
module
&
m
)
void
run_passes
(
migraphx
::
module
&
m
,
migraphx
::
gpu
::
context
&
ctx
)
{
{
auto
ctx
=
migraphx
::
gpu
::
context
{};
migraphx
::
run_passes
(
m
,
migraphx
::
run_passes
(
m
,
{
migraphx
::
auto_contiguous
{},
{
migraphx
::
auto_contiguous
{},
migraphx
::
gpu
::
lowering
{
&
ctx
,
false
},
migraphx
::
gpu
::
lowering
{
&
ctx
,
false
},
...
@@ -56,18 +56,6 @@ void run_passes(migraphx::module& m)
...
@@ -56,18 +56,6 @@ void run_passes(migraphx::module& m)
migraphx
::
dead_code_elimination
{}});
migraphx
::
dead_code_elimination
{}});
}
}
bool
get_int8_x4_format
()
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
ctx
=
migraphx
::
gpu
::
context
{};
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
return
int8_x4_format
;
}
TEST_CASE
(
quant_dot
)
TEST_CASE
(
quant_dot
)
{
{
auto
create_module
=
[]
{
auto
create_module
=
[]
{
...
@@ -106,8 +94,10 @@ TEST_CASE(quant_dot)
...
@@ -106,8 +94,10 @@ TEST_CASE(quant_dot)
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m2_shape
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m2_shape
)}}));
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
l2
,
alloc
);
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
l2
,
alloc
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
l1
,
l1
,
packa
,
packa
,
gemm_alloc
);
gemm_alloc
);
...
@@ -128,10 +118,11 @@ TEST_CASE(quant_dot)
...
@@ -128,10 +118,11 @@ TEST_CASE(quant_dot)
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
run_passes
(
m1
);
auto
ctx
=
migraphx
::
gpu
::
context
{};
run_passes
(
m1
,
ctx
);
bool
flag
=
get_int8_x4_format
();
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
flag
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -216,8 +207,10 @@ TEST_CASE(quant_dot_trans)
...
@@ -216,8 +207,10 @@ TEST_CASE(quant_dot_trans)
packb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
contb
,
allocpb
);
packb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
contb
,
allocpb
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
tl1_alpha_int8
,
tl1_alpha_int8
,
packb
,
packb
,
output
);
output
);
...
@@ -227,10 +220,11 @@ TEST_CASE(quant_dot_trans)
...
@@ -227,10 +220,11 @@ TEST_CASE(quant_dot_trans)
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -297,8 +291,10 @@ TEST_CASE(quant_dot_pad)
...
@@ -297,8 +291,10 @@ TEST_CASE(quant_dot_pad)
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
pl2
,
alloc
);
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
pl2
,
alloc
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
pl1
,
pl1
,
packa
,
packa
,
gemm_alloc
);
gemm_alloc
);
...
@@ -318,10 +314,11 @@ TEST_CASE(quant_dot_pad)
...
@@ -318,10 +314,11 @@ TEST_CASE(quant_dot_pad)
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -444,17 +441,23 @@ TEST_CASE(quant_dot_trans_pad)
...
@@ -444,17 +441,23 @@ TEST_CASE(quant_dot_trans_pad)
}
}
auto
gemm
=
m
.
add_instruction
(
auto
gemm
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
pa
,
packb
,
output
);
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
pa
,
packb
,
output
);
m
.
add_return
({
gemm
});
m
.
add_return
({
gemm
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
...
test/gpu/stream_sync.cpp
0 → 100644
View file @
180dc7a0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <iostream>
#include <vector>
#include <migraphx/gpu/context.hpp>
#include <migraphx/context.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/kernel.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/module.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/gpu/target.hpp>
#include "test.hpp"
using
hip_stream_ptr
=
MIGRAPHX_MANAGE_PTR
(
hipStream_t
,
hipStreamDestroy
);
constexpr
uint32_t
stream_sync_test_val
=
1337
;
// NOLINTNEXTLINE
const
std
::
string
compare_numbers
=
R"__migraphx__(
#include <hip/hip_runtime.h>
extern "C" {
__global__ void compare(float* data)
{
int i = threadIdx.x + blockDim.x * blockIdx.x;
if (data[i] != 1337)
{
abort();
}
}
}
int main() {}
)__migraphx__"
;
migraphx
::
src_file
make_src_file
(
const
std
::
string
&
name
,
const
std
::
string
&
content
)
{
return
{
name
,
std
::
make_pair
(
content
.
data
(),
content
.
data
()
+
content
.
size
())};
}
hip_stream_ptr
get_stream
()
{
hipStream_t
stream
;
auto
status
=
hipStreamCreate
(
&
stream
);
if
(
status
!=
hipSuccess
)
{
MIGRAPHX_THROW
(
"Failed to get stream"
);
}
return
hip_stream_ptr
{
stream
};
}
TEST_CASE
(
test_stream_sync_compare_kernel
)
{
auto
binaries
=
migraphx
::
gpu
::
compile_hip_src
(
{
make_src_file
(
"check_stuff.cpp"
,
compare_numbers
)},
""
,
migraphx
::
gpu
::
get_device_name
());
EXPECT
(
binaries
.
size
()
==
1
);
migraphx
::
gpu
::
kernel
k1
{
binaries
.
front
(),
"compare"
};
auto
input
=
migraphx
::
fill_argument
({
migraphx
::
shape
::
float_type
,
{
128
}},
stream_sync_test_val
);
auto
ginput
=
migraphx
::
gpu
::
to_gpu
(
input
);
hip_stream_ptr
pstream
=
get_stream
();
k1
.
launch
(
pstream
.
get
(),
input
.
get_shape
().
elements
(),
1024
)(
ginput
.
cast
<
float
>
());
auto
output
=
migraphx
::
gpu
::
from_gpu
(
ginput
);
EXPECT
(
output
==
input
);
}
TEST_CASE
(
test_stream_sync
)
{
auto
binaries
=
migraphx
::
gpu
::
compile_hip_src
(
{
make_src_file
(
"check_stuff.cpp"
,
compare_numbers
)},
""
,
migraphx
::
gpu
::
get_device_name
());
EXPECT
(
binaries
.
size
()
==
1
);
migraphx
::
gpu
::
kernel
k1
{
binaries
.
front
(),
"compare"
};
const
unsigned
int
m
=
128
;
const
unsigned
int
k
=
8192
;
// Setup empty GPU memory buffer
migraphx
::
shape
input_shape
{
migraphx
::
shape
::
float_type
,
{
m
,
k
}};
migraphx
::
shape
output_shape
{
migraphx
::
shape
::
float_type
,
{
m
,
m
}};
auto
input
=
migraphx
::
fill_argument
(
input_shape
,
0
);
auto
ginput
=
migraphx
::
gpu
::
to_gpu
(
input
);
auto
output
=
migraphx
::
fill_argument
(
output_shape
,
0
);
auto
goutput
=
migraphx
::
gpu
::
to_gpu
(
output
);
hip_stream_ptr
pstream
=
get_stream
();
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
m
,
k
}});
auto
y
=
mm
->
add_literal
(
migraphx
::
generate_literal
(
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
k
,
m
}}));
std
::
vector
<
float
>
data
(
m
*
m
,
stream_sync_test_val
);
auto
test_val
=
mm
->
add_literal
(
output_shape
,
data
);
auto
mult_out
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"dot"
),
x
,
y
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
mult_out
,
test_val
);
p
.
compile
(
migraphx
::
gpu
::
target
{});
// Run network and then verify with kernel
auto
args
=
p
.
eval
({{
"x"
,
ginput
},
{
"output"
,
goutput
}},
{
pstream
.
get
(),
true
});
k1
.
launch
(
pstream
.
get
(),
m
*
m
,
1024
)(
goutput
.
cast
<
float
>
());
output
=
migraphx
::
gpu
::
from_gpu
(
goutput
);
EXPECT
(
output
!=
input
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/py/CMakeLists.txt
View file @
180dc7a0
...
@@ -56,4 +56,5 @@ add_py_test(gpu_offload test_gpu_offload.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
...
@@ -56,4 +56,5 @@ add_py_test(gpu_offload test_gpu_offload.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
add_py_test
(
gpu test_gpu.py WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_py_test
(
gpu test_gpu.py WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_py_test
(
array test_array.py WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_py_test
(
array test_array.py WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_py_test
(
backend onnx_backend_test.py WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_py_test
(
backend onnx_backend_test.py WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_py_test
(
gpu_async test_gpu_async.py WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
endif
()
endif
()
test/py/test_gpu_async.py
0 → 100644
View file @
180dc7a0
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
import
migraphx
import
ctypes
def
test_conv_relu
():
hip
=
ctypes
.
cdll
.
LoadLibrary
(
"libamdhip64.so"
)
p
=
migraphx
.
parse_onnx
(
"conv_relu_maxpool_test.onnx"
)
print
(
p
)
print
(
"Compiling ..."
)
# Need to have offload_copy = False to avoid syncs() back to the host device
p
.
compile
(
migraphx
.
get_target
(
"gpu"
),
offload_copy
=
False
)
print
(
p
)
params
=
{}
# Using default value in api for hipSuccess which is always 0
hipSuccess
=
ctypes
.
c_long
(
0
)
# Alloc a stream
stream
=
ctypes
.
c_void_p
()
err
=
ctypes
.
c_long
(
hip
.
hipStreamCreateWithFlags
(
ctypes
.
byref
(
stream
),
ctypes
.
c_uint
(
0
)))
if
err
.
value
!=
hipSuccess
.
value
:
print
(
"FAILED hipStreamCreate"
)
return
err
# Use to_gpu to push generated argument to the GPU before we perform a run
for
key
,
value
in
p
.
get_parameter_shapes
().
items
():
params
[
key
]
=
migraphx
.
to_gpu
(
migraphx
.
generate_argument
(
value
))
result
=
migraphx
.
from_gpu
(
p
.
run_async
(
params
,
stream
.
value
,
"ihipStream_t"
)[
-
1
])
# Wait for all commands in stream to complete
err
=
ctypes
.
c_long
(
hip
.
hipStreamSynchronize
(
stream
))
if
err
.
value
!=
hipSuccess
.
value
:
print
(
"FAILED: hipStreamSyncronize"
)
return
err
# Cleanup Stream
err
=
ctypes
.
c_long
(
hip
.
hipStreamDestroy
(
stream
))
if
err
.
value
!=
hipSuccess
.
value
:
print
(
"FAILED: hipStreamDestroy"
)
return
err
print
(
result
)
test_conv_relu
()
test/verify/test_softmax_large3.cpp
0 → 100644
View file @
180dc7a0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/common.hpp>
struct
test_softmax_large3
:
verify_program
<
test_softmax_large3
>
{
migraphx
::
program
create_program
()
const
{
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
2
,
4
}});
auto
large
=
mm
->
add_literal
({
migraphx
::
shape
{
migraphx
::
shape
::
float_type
},
{
100
}});
auto
add
=
migraphx
::
add_common_op
(
*
mm
,
migraphx
::
make_op
(
"mul"
),
{
x
,
large
});
mm
->
add_instruction
(
migraphx
::
make_op
(
"softmax"
,
{{
"axis"
,
-
1
}}),
add
);
return
p
;
}
};
tools/accuracy/accuracy_checker.py
View file @
180dc7a0
...
@@ -116,6 +116,9 @@ def main():
...
@@ -116,6 +116,9 @@ def main():
model
=
migraphx
.
parse_onnx
(
model_name
,
default_dim_value
=
batch
)
model
=
migraphx
.
parse_onnx
(
model_name
,
default_dim_value
=
batch
)
if
args
.
verbose
:
print
(
model
)
model
.
compile
(
migraphx
.
get_target
(
'gpu'
),
offload_copy
=
False
)
model
.
compile
(
migraphx
.
get_target
(
'gpu'
),
offload_copy
=
False
)
params
=
{}
params
=
{}
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment