Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
7f97b8ef
Unverified
Commit
7f97b8ef
authored
Oct 07, 2022
by
Ted Themistokleous
Committed by
GitHub
Oct 07, 2022
Browse files
Merge branch 'simplify_1_mul_div_ops' into divide_by_zero_check
parents
2ba401f0
d1fed367
Changes
448
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
625 additions
and
113 deletions
+625
-113
test/check_shapes_test.cpp
test/check_shapes_test.cpp
+1
-1
test/eval_test.cpp
test/eval_test.cpp
+1
-1
test/fpga/get_target_assignments.cpp
test/fpga/get_target_assignments.cpp
+12
-9
test/fuse_pointwise.cpp
test/fuse_pointwise.cpp
+1
-1
test/gpu/adjust_allocation.cpp
test/gpu/adjust_allocation.cpp
+5
-1
test/gpu/jit.cpp
test/gpu/jit.cpp
+2
-0
test/gpu/make_precompile_op.hpp
test/gpu/make_precompile_op.hpp
+66
-0
test/gpu/manage_host_buffer.cpp
test/gpu/manage_host_buffer.cpp
+99
-0
test/gpu/mlir.cpp
test/gpu/mlir.cpp
+4
-8
test/gpu/pack_int8_args.cpp
test/gpu/pack_int8_args.cpp
+84
-75
test/gpu/stream_sync.cpp
test/gpu/stream_sync.cpp
+146
-0
test/include/basic_ops.hpp
test/include/basic_ops.hpp
+5
-4
test/include/test.hpp
test/include/test.hpp
+13
-10
test/literal_test.cpp
test/literal_test.cpp
+2
-2
test/memory_coloring_test.cpp
test/memory_coloring_test.cpp
+1
-1
test/onnx/batch_norm_1d_test.onnx
test/onnx/batch_norm_1d_test.onnx
+35
-0
test/onnx/batch_norm_2d_test.onnx
test/onnx/batch_norm_2d_test.onnx
+35
-0
test/onnx/batch_norm_3d_test.onnx
test/onnx/batch_norm_3d_test.onnx
+44
-0
test/onnx/batch_norm_flat_test.onnx
test/onnx/batch_norm_flat_test.onnx
+34
-0
test/onnx/batch_norm_invalid_bias_rank_test.onnx
test/onnx/batch_norm_invalid_bias_rank_test.onnx
+35
-0
No files found.
test/check_shapes_test.cpp
View file @
7f97b8ef
...
@@ -49,6 +49,6 @@ bool create_shapes(bool dynamic_allowed)
...
@@ -49,6 +49,6 @@ bool create_shapes(bool dynamic_allowed)
TEST_CASE
(
allow_dynamic_shape
)
{
EXPECT
(
create_shapes
(
true
));
}
TEST_CASE
(
allow_dynamic_shape
)
{
EXPECT
(
create_shapes
(
true
));
}
TEST_CASE
(
fail_dynamic_shape
)
{
EXPECT
(
!
create_shapes
(
false
));
}
TEST_CASE
(
fail_dynamic_shape
)
{
EXPECT
(
not
create_shapes
(
false
));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/eval_test.cpp
View file @
7f97b8ef
...
@@ -187,7 +187,7 @@ TEST_CASE(print_test)
...
@@ -187,7 +187,7 @@ TEST_CASE(print_test)
std
::
stringstream
ss
;
std
::
stringstream
ss
;
ss
<<
p
;
ss
<<
p
;
std
::
string
s
=
ss
.
str
();
std
::
string
s
=
ss
.
str
();
EXPECT
(
!
s
.
empty
());
EXPECT
(
not
s
.
empty
());
}
}
TEST_CASE
(
param_test
)
TEST_CASE
(
param_test
)
...
...
test/get_target_assignments.cpp
→
test/
fpga/
get_target_assignments.cpp
View file @
7f97b8ef
...
@@ -26,8 +26,9 @@
...
@@ -26,8 +26,9 @@
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/
ref
/target.hpp>
#include <migraphx/
fpga
/target.hpp>
#include <migraphx/target_assignments.hpp>
#include <migraphx/target_assignments.hpp>
#include <migraphx/iterator_for.hpp>
migraphx
::
program
create_program
()
migraphx
::
program
create_program
()
{
{
...
@@ -37,8 +38,8 @@ migraphx::program create_program()
...
@@ -37,8 +38,8 @@ migraphx::program create_program()
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
z
=
mm
->
add_parameter
(
"z"
,
s
);
auto
z
=
mm
->
add_parameter
(
"z"
,
s
);
auto
diff
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"
div
"
),
x
,
y
);
auto
diff
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"
add
"
),
x
,
y
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"
div
"
),
diff
,
z
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"
add
"
),
diff
,
z
);
return
p
;
return
p
;
}
}
...
@@ -46,15 +47,17 @@ TEST_CASE(is_supported)
...
@@ -46,15 +47,17 @@ TEST_CASE(is_supported)
{
{
auto
p
=
create_program
();
auto
p
=
create_program
();
auto
targets
=
migraphx
::
get_targets
();
auto
targets
=
migraphx
::
get_targets
();
EXPECT
(
!
targets
.
empty
());
EXPECT
(
not
targets
.
empty
());
auto
first_target
=
targets
[
0
];
auto
t
=
migraphx
::
make_target
(
"fpga"
);
auto
t
=
migraphx
::
make_target
(
first_target
);
const
auto
assignments
=
p
.
get_target_assignments
({
t
});
const
auto
assignments
=
p
.
get_target_assignments
({
t
});
for
(
const
auto
&
[
ins
,
target
]
:
assignments
)
const
auto
*
mod
=
p
.
get_main_module
();
EXPECT
(
mod
->
size
()
==
assignments
.
size
());
for
(
const
auto
ins
:
iterator_for
(
*
mod
))
{
{
(
void
)
ins
;
const
auto
&
target
=
assignments
.
at
(
ins
)
;
EXPECT
(
target
==
first_target
);
EXPECT
(
target
==
"fpga"
);
}
}
}
}
...
...
test/fuse_pointwise.cpp
View file @
7f97b8ef
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include
"
migraphx/dead_code_elimination.hpp
"
#include
<
migraphx/dead_code_elimination.hpp
>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/pass_manager.hpp>
...
...
test/gpu/adjust_allocation.cpp
View file @
7f97b8ef
...
@@ -40,6 +40,10 @@
...
@@ -40,6 +40,10 @@
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <basic_ops.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
#include <test.hpp>
#include "make_precompile_op.hpp"
// Treat some operators as compilable to enable lowering
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
void
run_lowering
(
migraphx
::
program
&
p
,
bool
offload_copy
=
false
)
void
run_lowering
(
migraphx
::
program
&
p
,
bool
offload_copy
=
false
)
{
{
...
@@ -118,7 +122,7 @@ TEST_CASE(no_copy_dead_param)
...
@@ -118,7 +122,7 @@ TEST_CASE(no_copy_dead_param)
auto
xb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
xb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
gx
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_to_gpu"
),
x
,
xb
);
auto
gx
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_to_gpu"
),
x
,
xb
);
auto
ab
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
ab
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
sum
=
mm
->
add_instruction
(
m
igraphx
::
mak
e_op
(
"
gpu::
add"
),
gx
,
gx
,
ab
);
auto
sum
=
mm
->
add_instruction
(
m
ake_precompil
e_op
(
"add"
),
gx
,
gx
,
ab
);
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_from_gpu"
),
sum
);
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_from_gpu"
),
sum
);
mm
->
add_return
({
r
});
mm
->
add_return
({
r
});
...
...
test/gpu/jit.cpp
View file @
7f97b8ef
...
@@ -307,12 +307,14 @@ TEST_CASE(compile_math)
...
@@ -307,12 +307,14 @@ TEST_CASE(compile_math)
"erf(x)"
,
"erf(x)"
,
"exp(x)"
,
"exp(x)"
,
"floor(x)"
,
"floor(x)"
,
"fmod(x, x)"
,
"isnan(x)"
,
"isnan(x)"
,
"log(x)"
,
"log(x)"
,
"max(x, x)"
,
"max(x, x)"
,
"min(x, x)"
,
"min(x, x)"
,
"pow(x, 0)"
,
"pow(x, 0)"
,
"pow(x, x)"
,
"pow(x, x)"
,
"remainder(x,x)"
,
"round(x)"
,
"round(x)"
,
"rsqrt(x)"
,
"rsqrt(x)"
,
"sin(x)"
,
"sin(x)"
,
...
...
src/targets/gpu/device/gelu.c
pp
→
test/gpu/make_precompile_op.h
pp
View file @
7f97b8ef
...
@@ -21,63 +21,46 @@
...
@@ -21,63 +21,46 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/device/gelu.hpp>
#ifndef MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
#include <migraphx/gpu/device/nary.hpp>
#define MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
#include <migraphx/gpu/device/types.hpp>
#include <cmath>
namespace
migraphx
{
#include <migraphx/operation.hpp>
inline
namespace
MIGRAPHX_INLINE_NS
{
#include <migraphx/gpu/compiler.hpp>
namespace
gpu
{
#include <migraphx/make_op.hpp>
namespace
device
{
// x * 0.5 * (1.0 + erf(x / sqrt(2.0)))
// NOLINTNEXTLINE
template
<
class
T
>
#define MIGRAPHX_GPU_TEST_PRECOMPILE(...) \
auto
gelu_fn
(
T
x
)
__device__
struct test_compiler : migraphx::gpu::compiler<test_compiler> \
{
{ \
return
x
*
0.5
*
(
1
+
::
erf
(
x
*
M_SQRT1_2
));
std::vector<std::string> names() const { return {__VA_ARGS__}; } \
}
\
template <class... Ts> \
migraphx::operation compile_op(Ts&&...) const \
{ \
MIGRAPHX_THROW("Not compilable"); \
} \
\
template <class... Ts> \
migraphx::gpu::compiler_replace compile(Ts&&...) const \
{ \
MIGRAPHX_THROW("Not compilable"); \
} \
};
// 0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))
inline
migraphx
::
operation
make_precompile_op
(
migraphx
::
rank
<
0
>
,
const
migraphx
::
operation
&
op
)
template
<
class
T
>
auto
gelu_fn_new
(
T
x
)
__device__
{
{
return
0.5
*
x
*
(
1
+
tanh
(
sqrt
(
M_2_PI
)
*
(
x
+
0.044715
*
x
*
x
*
x
))
);
return
migraphx
::
make_op
(
"gpu::precompile_op"
,
{{
"op"
,
migraphx
::
to_value
(
op
)}}
);
}
}
void
gelu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
inline
migraphx
::
operation
make_precompile_op
(
migraphx
::
rank
<
1
>
,
const
std
::
string
&
name
)
{
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
gelu_fn
(
to_hip_type
(
x
));
}
);
return
make_precompile_op
(
migraphx
::
rank
<
0
>
{},
migraphx
::
make_op
(
name
)
);
}
}
void
gelu_new
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
template
<
class
T
>
{
auto
make_precompile_op
(
const
T
&
x
)
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
gelu_fn_new
(
to_hip_type
(
x
));
});
}
void
add_gelu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
auto
sum
=
to_hip_type
(
x
+
y
);
return
gelu_fn
(
sum
);
});
}
void
add_gelu_new
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
return
make_precompile_op
(
migraphx
::
rank
<
1
>
{},
x
);
auto
sum
=
to_hip_type
(
x
+
y
);
return
gelu_fn
(
sum
);
});
}
}
}
// namespace device
#endif // MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
test/gpu/manage_host_buffer.cpp
0 → 100644
View file @
7f97b8ef
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <iostream>
#include <vector>
#include <hip/hip_runtime_api.h>
#include <migraphx/gpu/target.hpp>
#include <migraphx/verify.hpp>
#include <test.hpp>
#include <basic_ops.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/make_op.hpp>
#define MIGRAPHX_HIP_ASSERT(x) (EXPECT(x == hipSuccess))
TEST_CASE
(
host_same_buffer_copy
)
{
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
migraphx
::
shape
ss
{
migraphx
::
shape
::
float_type
,
{
4
,
2
}};
auto
a
=
mm
->
add_parameter
(
"a"
,
ss
);
auto
b
=
mm
->
add_parameter
(
"b"
,
ss
);
auto
aa
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
a
,
a
);
auto
gpu_out
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_from_gpu"
),
aa
);
auto
stream_sync
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::sync_stream"
),
gpu_out
);
auto
pass
=
mm
->
add_instruction
(
unary_pass_op
{},
stream_sync
);
auto
alloc
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ss
)}}));
auto
gpu_in
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_to_gpu"
),
pass
,
alloc
);
auto
aab
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
gpu_in
,
b
);
mm
->
add_return
({
aab
});
migraphx
::
parameter_map
pp
;
std
::
vector
<
float
>
a_vec
(
ss
.
elements
(),
-
1
);
std
::
vector
<
float
>
b_vec
(
ss
.
elements
(),
2
);
std
::
vector
<
float
>
c_vec
(
ss
.
elements
(),
0
);
pp
[
"a"
]
=
migraphx
::
argument
(
ss
,
a_vec
.
data
());
pp
[
"b"
]
=
migraphx
::
argument
(
ss
,
b_vec
.
data
());
std
::
vector
<
float
>
gpu_result
;
migraphx
::
target
gpu_t
=
migraphx
::
gpu
::
target
{};
migraphx
::
compile_options
options
;
options
.
offload_copy
=
true
;
p
.
compile
(
gpu_t
,
options
);
auto
result
=
p
.
eval
(
pp
).
back
();
std
::
vector
<
float
>
results_vector
(
ss
.
elements
(),
-
1
);
result
.
visit
([
&
](
auto
output
)
{
results_vector
.
assign
(
output
.
begin
(),
output
.
end
());
});
EXPECT
(
migraphx
::
verify_range
(
c_vec
,
results_vector
));
}
TEST_CASE
(
arguments_lifetime
)
{
auto
use_on_gpu
=
[](
const
migraphx
::
argument
&
arg
,
int
c
)
{
auto
*
arg_ptr
=
arg
.
data
();
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
arg_ptr
,
c
,
arg
.
get_shape
().
bytes
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
;
};
auto
f
=
[
use_on_gpu
](
const
migraphx
::
argument
&
input
)
{
auto
a
=
migraphx
::
gpu
::
register_on_gpu
(
input
);
auto
s
=
a
.
get_shape
();
{
auto
b
=
migraphx
::
gpu
::
register_on_gpu
(
input
);
use_on_gpu
(
b
,
0
);
std
::
vector
<
float
>
expected_b
(
s
.
elements
(),
0
);
auto
gold
=
migraphx
::
argument
(
s
,
expected_b
.
data
());
}
use_on_gpu
(
a
,
1
);
return
true
;
};
migraphx
::
shape
ss
{
migraphx
::
shape
::
float_type
,
{
4
,
2
}};
std
::
vector
<
float
>
x_data
(
ss
.
elements
(),
-
1
);
migraphx
::
argument
x
{
ss
,
x_data
.
data
()};
EXPECT
(
f
(
x
));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/gpu/mlir.cpp
View file @
7f97b8ef
...
@@ -37,10 +37,6 @@
...
@@ -37,10 +37,6 @@
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <test.hpp>
#include <test.hpp>
using
migraphx
::
trim
;
// m test_gpu_mlir && ./bin/test_gpu_mlir
struct
mlir_gpu_target
:
migraphx
::
gpu
::
target
struct
mlir_gpu_target
:
migraphx
::
gpu
::
target
{
{
std
::
string
name
()
const
{
return
"mlir"
;
}
std
::
string
name
()
const
{
return
"mlir"
;
}
...
@@ -144,8 +140,8 @@ TEST_CASE(conv)
...
@@ -144,8 +140,8 @@ TEST_CASE(conv)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
func
.func
@main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]
, use_dynamic_same_auto_pad = 0 : i64
} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
}
}
}
}
...
@@ -167,8 +163,8 @@ TEST_CASE(conv_add_relu)
...
@@ -167,8 +163,8 @@ TEST_CASE(conv_add_relu)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
func
.func
@main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]
, use_dynamic_same_auto_pad = 0 : i64
} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
return %2 : tensor<1x2x2x2xf32>
return %2 : tensor<1x2x2x2xf32>
...
...
test/gpu/pack_int8_args.cpp
View file @
7f97b8ef
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include
"
migraphx/instruction_ref.hpp
"
#include
<
migraphx/instruction_ref.hpp
>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/target.hpp>
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
...
@@ -38,10 +39,13 @@
...
@@ -38,10 +39,13 @@
#include <migraphx/pass_manager.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <test.hpp>
#include <test.hpp>
#include "make_precompile_op.hpp"
void
run_passes
(
migraphx
::
module
&
m
)
// Treat some operators as compilable to enable lowering
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
void
run_passes
(
migraphx
::
module
&
m
,
migraphx
::
gpu
::
context
&
ctx
)
{
{
auto
ctx
=
migraphx
::
gpu
::
context
{};
migraphx
::
run_passes
(
m
,
migraphx
::
run_passes
(
m
,
{
migraphx
::
auto_contiguous
{},
{
migraphx
::
auto_contiguous
{},
migraphx
::
gpu
::
lowering
{
&
ctx
,
false
},
migraphx
::
gpu
::
lowering
{
&
ctx
,
false
},
...
@@ -52,18 +56,6 @@ void run_passes(migraphx::module& m)
...
@@ -52,18 +56,6 @@ void run_passes(migraphx::module& m)
migraphx
::
dead_code_elimination
{}});
migraphx
::
dead_code_elimination
{}});
}
}
bool
get_int8_x4_format
()
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
ctx
=
migraphx
::
gpu
::
context
{};
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
return
int8_x4_format
;
}
TEST_CASE
(
quant_dot
)
TEST_CASE
(
quant_dot
)
{
{
auto
create_module
=
[]
{
auto
create_module
=
[]
{
...
@@ -102,11 +94,13 @@ TEST_CASE(quant_dot)
...
@@ -102,11 +94,13 @@ TEST_CASE(quant_dot)
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m2_shape
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m2_shape
)}}));
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
l2
,
alloc
);
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
l2
,
alloc
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
l1
,
{{
"int8_x4_format"
,
int8_x4
},
packa
,
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
gemm_alloc
);
l1
,
packa
,
gemm_alloc
);
auto
beta_broadcast
=
m
.
add_instruction
(
auto
beta_broadcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
m3_shape
.
lens
()}}),
beta
);
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
m3_shape
.
lens
()}}),
beta
);
...
@@ -116,19 +110,19 @@ TEST_CASE(quant_dot)
...
@@ -116,19 +110,19 @@ TEST_CASE(quant_dot)
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m3_shape
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m3_shape
)}}));
auto
m3_beta
=
auto
m3_beta
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
make_precompile_op
(
"add"
),
gemm
,
m3_beta
,
output
);
auto
gemm_add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::add"
),
gemm
,
m3_beta
,
output
);
m
.
add_return
({
gemm_add
});
m
.
add_return
({
gemm_add
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
run_passes
(
m1
);
auto
ctx
=
migraphx
::
gpu
::
context
{};
run_passes
(
m1
,
ctx
);
bool
flag
=
get_int8_x4_format
();
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
flag
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -187,21 +181,23 @@ TEST_CASE(quant_dot_trans)
...
@@ -187,21 +181,23 @@ TEST_CASE(quant_dot_trans)
// back result to int8
// back result to int8
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
auto
tl1_convert
=
m
.
add_instruction
(
auto
tl1_convert
=
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}}),
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
conta
,
"convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}})),
tl1_convert_alloc
);
conta
,
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
auto
tl1_alpha_int32
=
m
.
add_instruction
(
auto
tl1_alpha_int32
=
m
igraphx
::
make_op
(
"gpu::
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
m
.
add_instruction
(
make_precompile_op
(
"
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
// convert mul_res to int8
// convert mul_res to int8
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
auto
tl1_alpha_int8
=
m
.
add_instruction
(
auto
tl1_alpha_int8
=
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}}),
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
tl1_alpha_int32
,
"convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}})),
tl1_alpha_int8_alloc
);
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
packb
=
contb
;
auto
packb
=
contb
;
if
(
int8_x4
)
if
(
int8_x4
)
...
@@ -211,21 +207,24 @@ TEST_CASE(quant_dot_trans)
...
@@ -211,21 +207,24 @@ TEST_CASE(quant_dot_trans)
packb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
contb
,
allocpb
);
packb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
contb
,
allocpb
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
tl1_alpha_int8
,
{{
"int8_x4_format"
,
int8_x4
},
packb
,
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
output
);
tl1_alpha_int8
,
packb
,
output
);
m
.
add_return
({
gemm
});
m
.
add_return
({
gemm
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -292,11 +291,13 @@ TEST_CASE(quant_dot_pad)
...
@@ -292,11 +291,13 @@ TEST_CASE(quant_dot_pad)
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
pl2
,
alloc
);
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
pl2
,
alloc
);
}
}
auto
gemm
=
auto
gemm
=
m
.
add_instruction
(
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
migraphx
::
make_op
(
"gpu::quant_gemm"
,
pl1
,
{{
"int8_x4_format"
,
int8_x4
},
packa
,
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
gemm_alloc
);
pl1
,
packa
,
gemm_alloc
);
auto
beta_broadcast
=
auto
beta_broadcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s3
.
lens
()}}),
beta
);
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s3
.
lens
()}}),
beta
);
...
@@ -306,18 +307,18 @@ TEST_CASE(quant_dot_pad)
...
@@ -306,18 +307,18 @@ TEST_CASE(quant_dot_pad)
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
s3
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
s3
)}}));
auto
m3_beta
=
auto
m3_beta
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
make_precompile_op
(
"add"
),
gemm
,
m3_beta
,
output
);
auto
gemm_add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::add"
),
gemm
,
m3_beta
,
output
);
m
.
add_return
({
gemm_add
});
m
.
add_return
({
gemm_add
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
@@ -396,14 +397,15 @@ TEST_CASE(quant_dot_trans_pad)
...
@@ -396,14 +397,15 @@ TEST_CASE(quant_dot_trans_pad)
// back result to int8
// back result to int8
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
auto
tl1_convert
=
m
.
add_instruction
(
auto
tl1_convert
=
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}}),
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
conta
,
"convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}})),
tl1_convert_alloc
);
conta
,
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
auto
tl1_alpha_int32
=
m
.
add_instruction
(
auto
tl1_alpha_int32
=
m
igraphx
::
make_op
(
"gpu::
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
m
.
add_instruction
(
make_precompile_op
(
"
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
// convert mul_res to int8
// convert mul_res to int8
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
...
@@ -415,10 +417,11 @@ TEST_CASE(quant_dot_trans_pad)
...
@@ -415,10 +417,11 @@ TEST_CASE(quant_dot_trans_pad)
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps1
)}}));
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps1
)}}));
}
}
auto
tl1_alpha_int8
=
m
.
add_instruction
(
auto
tl1_alpha_int8
=
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}}),
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
tl1_alpha_int32
,
"convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}})),
tl1_alpha_int8_alloc
);
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
pa
=
tl1_alpha_int8
;
auto
pa
=
tl1_alpha_int8
;
if
(
int8_x4
)
if
(
int8_x4
)
...
@@ -438,17 +441,23 @@ TEST_CASE(quant_dot_trans_pad)
...
@@ -438,17 +441,23 @@ TEST_CASE(quant_dot_trans_pad)
}
}
auto
gemm
=
m
.
add_instruction
(
auto
gemm
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
}}),
pa
,
packb
,
output
);
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
pa
,
packb
,
output
);
m
.
add_return
({
gemm
});
m
.
add_return
({
gemm
});
return
m
;
return
m
;
};
};
auto
m1
=
create_module
();
auto
m1
=
create_module
();
bool
flag
=
get_int8_x4_format
()
;
auto
ctx
=
migraphx
::
gpu
::
context
{}
;
auto
m2
=
create_optimized_int8_x4
(
flag
);
run_passes
(
m1
,
ctx
);
run_passes
(
m1
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
EXPECT
(
m1
==
m2
);
}
}
...
...
test/gpu/stream_sync.cpp
0 → 100644
View file @
7f97b8ef
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <iostream>
#include <vector>
#include <migraphx/gpu/context.hpp>
#include <migraphx/context.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/kernel.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/module.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/gpu/target.hpp>
#include "test.hpp"
using
hip_stream_ptr
=
MIGRAPHX_MANAGE_PTR
(
hipStream_t
,
hipStreamDestroy
);
constexpr
uint32_t
stream_sync_test_val
=
1337
;
// NOLINTNEXTLINE
const
std
::
string
compare_numbers
=
R"__migraphx__(
#include <hip/hip_runtime.h>
extern "C" {
__global__ void compare(float* data)
{
int i = threadIdx.x + blockDim.x * blockIdx.x;
if (data[i] != 1337)
{
abort();
}
}
}
int main() {}
)__migraphx__"
;
migraphx
::
src_file
make_src_file
(
const
std
::
string
&
name
,
const
std
::
string
&
content
)
{
return
{
name
,
std
::
make_pair
(
content
.
data
(),
content
.
data
()
+
content
.
size
())};
}
hip_stream_ptr
get_stream
()
{
hipStream_t
stream
;
auto
status
=
hipStreamCreate
(
&
stream
);
if
(
status
!=
hipSuccess
)
{
MIGRAPHX_THROW
(
"Failed to get stream"
);
}
return
hip_stream_ptr
{
stream
};
}
TEST_CASE
(
test_stream_sync_compare_kernel
)
{
auto
binaries
=
migraphx
::
gpu
::
compile_hip_src
(
{
make_src_file
(
"check_stuff.cpp"
,
compare_numbers
)},
""
,
migraphx
::
gpu
::
get_device_name
());
EXPECT
(
binaries
.
size
()
==
1
);
migraphx
::
gpu
::
kernel
k1
{
binaries
.
front
(),
"compare"
};
auto
input
=
migraphx
::
fill_argument
({
migraphx
::
shape
::
float_type
,
{
128
}},
stream_sync_test_val
);
auto
ginput
=
migraphx
::
gpu
::
to_gpu
(
input
);
hip_stream_ptr
pstream
=
get_stream
();
k1
.
launch
(
pstream
.
get
(),
input
.
get_shape
().
elements
(),
1024
)(
ginput
.
cast
<
float
>
());
auto
output
=
migraphx
::
gpu
::
from_gpu
(
ginput
);
EXPECT
(
output
==
input
);
}
TEST_CASE
(
test_stream_sync
)
{
auto
binaries
=
migraphx
::
gpu
::
compile_hip_src
(
{
make_src_file
(
"check_stuff.cpp"
,
compare_numbers
)},
""
,
migraphx
::
gpu
::
get_device_name
());
EXPECT
(
binaries
.
size
()
==
1
);
migraphx
::
gpu
::
kernel
k1
{
binaries
.
front
(),
"compare"
};
const
unsigned
int
m
=
128
;
const
unsigned
int
k
=
8192
;
// Setup empty GPU memory buffer
migraphx
::
shape
input_shape
{
migraphx
::
shape
::
float_type
,
{
m
,
k
}};
migraphx
::
shape
output_shape
{
migraphx
::
shape
::
float_type
,
{
m
,
m
}};
auto
input
=
migraphx
::
fill_argument
(
input_shape
,
0
);
auto
ginput
=
migraphx
::
gpu
::
to_gpu
(
input
);
auto
output
=
migraphx
::
fill_argument
(
output_shape
,
0
);
auto
goutput
=
migraphx
::
gpu
::
to_gpu
(
output
);
hip_stream_ptr
pstream
=
get_stream
();
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
m
,
k
}});
auto
y
=
mm
->
add_literal
(
migraphx
::
generate_literal
(
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
k
,
m
}}));
std
::
vector
<
float
>
data
(
m
*
m
,
stream_sync_test_val
);
auto
test_val
=
mm
->
add_literal
(
output_shape
,
data
);
auto
mult_out
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"dot"
),
x
,
y
);
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
mult_out
,
test_val
);
p
.
compile
(
migraphx
::
gpu
::
target
{});
// Run network and then verify with kernel
auto
args
=
p
.
eval
({{
"x"
,
ginput
},
{
"output"
,
goutput
}},
{
pstream
.
get
(),
true
});
k1
.
launch
(
pstream
.
get
(),
m
*
m
,
1024
)(
goutput
.
cast
<
float
>
());
output
=
migraphx
::
gpu
::
from_gpu
(
goutput
);
EXPECT
(
output
!=
input
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/include/basic_ops.hpp
View file @
7f97b8ef
...
@@ -112,12 +112,12 @@ struct mod_pass_op
...
@@ -112,12 +112,12 @@ struct mod_pass_op
migraphx
::
shape
compute_shape
(
std
::
vector
<
migraphx
::
shape
>
inputs
,
migraphx
::
shape
compute_shape
(
std
::
vector
<
migraphx
::
shape
>
inputs
,
std
::
vector
<
migraphx
::
module_ref
>
mods
)
const
std
::
vector
<
migraphx
::
module_ref
>
mods
)
const
{
{
if
(
!
mods
.
empty
())
if
(
not
mods
.
empty
())
{
{
auto
out_shapes
=
mods
[
0
]
->
get_output_shapes
();
auto
out_shapes
=
mods
[
0
]
->
get_output_shapes
();
return
out_shapes
[
0
];
return
out_shapes
[
0
];
}
}
if
(
!
inputs
.
empty
())
if
(
not
inputs
.
empty
())
{
{
return
inputs
.
front
();
return
inputs
.
front
();
}
}
...
@@ -186,9 +186,10 @@ struct nop
...
@@ -186,9 +186,10 @@ struct nop
migraphx
::
shape
compute_shape
(
const
std
::
vector
<
migraphx
::
shape
>&
)
const
{
return
{};
}
migraphx
::
shape
compute_shape
(
const
std
::
vector
<
migraphx
::
shape
>&
)
const
{
return
{};
}
};
};
inline
migraphx
::
literal
get_2x2
()
inline
migraphx
::
literal
get_2x2
(
int
base
=
0
)
{
{
return
migraphx
::
literal
{{
migraphx
::
shape
::
float_type
,
{
2
,
2
}},
{
1
,
2
,
3
,
4
}};
return
migraphx
::
literal
{{
migraphx
::
shape
::
float_type
,
{
2
,
2
}},
{
base
+
1
,
base
+
2
,
base
+
3
,
base
+
4
}};
}
}
inline
migraphx
::
literal
get_2x2_transposed
()
inline
migraphx
::
literal
get_2x2_transposed
()
...
...
test/include/test.hpp
View file @
7f97b8ef
...
@@ -108,15 +108,7 @@ struct function
...
@@ -108,15 +108,7 @@ struct function
};
};
template
<
class
Stream
,
class
Iterator
>
template
<
class
Stream
,
class
Iterator
>
inline
Stream
&
stream_range
(
Stream
&
s
,
Iterator
start
,
Iterator
last
)
Stream
&
stream_range
(
Stream
&
s
,
Iterator
start
,
Iterator
last
);
{
if
(
start
!=
last
)
{
s
<<
*
start
;
std
::
for_each
(
std
::
next
(
start
),
last
,
[
&
](
auto
&&
x
)
{
s
<<
", "
<<
x
;
});
}
return
s
;
}
template
<
class
Stream
>
template
<
class
Stream
>
inline
Stream
&
operator
<<
(
Stream
&
s
,
std
::
nullptr_t
)
inline
Stream
&
operator
<<
(
Stream
&
s
,
std
::
nullptr_t
)
...
@@ -136,6 +128,17 @@ inline auto operator<<(Stream& s, const Range& v) -> decltype(stream_range(s, v.
...
@@ -136,6 +128,17 @@ inline auto operator<<(Stream& s, const Range& v) -> decltype(stream_range(s, v.
return
s
;
return
s
;
}
}
template
<
class
Stream
,
class
Iterator
>
inline
Stream
&
stream_range
(
Stream
&
s
,
Iterator
start
,
Iterator
last
)
{
if
(
start
!=
last
)
{
s
<<
*
start
;
std
::
for_each
(
std
::
next
(
start
),
last
,
[
&
](
auto
&&
x
)
{
s
<<
", "
<<
x
;
});
}
return
s
;
}
template
<
class
T
>
template
<
class
T
>
const
T
&
get_value
(
const
T
&
x
)
const
T
&
get_value
(
const
T
&
x
)
{
{
...
@@ -342,7 +345,7 @@ inline std::ostream& operator<<(std::ostream& os, const color& c)
...
@@ -342,7 +345,7 @@ inline std::ostream& operator<<(std::ostream& os, const color& c)
template
<
class
T
,
class
F
>
template
<
class
T
,
class
F
>
void
failed
(
T
x
,
const
char
*
msg
,
const
char
*
func
,
const
char
*
file
,
int
line
,
F
f
)
void
failed
(
T
x
,
const
char
*
msg
,
const
char
*
func
,
const
char
*
file
,
int
line
,
F
f
)
{
{
if
(
!
bool
(
x
.
value
()))
if
(
not
bool
(
x
.
value
()))
{
{
std
::
cout
<<
func
<<
std
::
endl
;
std
::
cout
<<
func
<<
std
::
endl
;
std
::
cout
<<
file
<<
":"
<<
line
<<
":"
<<
std
::
endl
;
std
::
cout
<<
file
<<
":"
<<
line
<<
":"
<<
std
::
endl
;
...
...
test/literal_test.cpp
View file @
7f97b8ef
...
@@ -39,8 +39,8 @@ TEST_CASE(literal_test)
...
@@ -39,8 +39,8 @@ TEST_CASE(literal_test)
migraphx
::
literal
l2
=
l1
;
// NOLINT
migraphx
::
literal
l2
=
l1
;
// NOLINT
EXPECT
(
l1
==
l2
);
EXPECT
(
l1
==
l2
);
EXPECT
(
l1
.
at
<
int
>
(
0
)
==
1
);
EXPECT
(
l1
.
at
<
int
>
(
0
)
==
1
);
EXPECT
(
!
l1
.
empty
());
EXPECT
(
not
l1
.
empty
());
EXPECT
(
!
l2
.
empty
());
EXPECT
(
not
l2
.
empty
());
migraphx
::
literal
l3
{};
migraphx
::
literal
l3
{};
migraphx
::
literal
l4
{};
migraphx
::
literal
l4
{};
...
...
test/memory_coloring_test.cpp
View file @
7f97b8ef
...
@@ -724,7 +724,7 @@ TEST_CASE(test39)
...
@@ -724,7 +724,7 @@ TEST_CASE(test39)
auto
sub_modules
=
p
.
get_modules
();
auto
sub_modules
=
p
.
get_modules
();
std
::
reverse
(
sub_modules
.
begin
(),
sub_modules
.
end
());
std
::
reverse
(
sub_modules
.
begin
(),
sub_modules
.
end
());
for
(
auto
&
smod
:
sub_modules
)
for
(
const
auto
&
smod
:
sub_modules
)
{
{
run_pass
(
*
smod
);
run_pass
(
*
smod
);
}
}
...
...
test/onnx/batch_norm_1d_test.onnx
0 → 100644
View file @
7f97b8ef
batch_norm_1d_test:
7
x
scale
bias
mean
variancey"BatchNormalizationbatch_norm_1d_testZ
x
Z
scale
Z
bias
Z
mean
Z
variance
b
y
B
\ No newline at end of file
test/onnx/batch_norm_2d_test.onnx
0 → 100644
View file @
7f97b8ef
batch_norm_2d_test:
7
x
scale
bias
mean
variancey"BatchNormalizationbatch_norm_2d_testZ
x
Z
scale
Z
bias
Z
mean
Z
variance
b
y
B
\ No newline at end of file
test/onnx/batch_norm_3d_test.onnx
0 → 100644
View file @
7f97b8ef
batch_norm_3d_test:
J
x
scale
bias
mean
variancey"BatchNormalization*
epsilon75batch_norm_3d_testZ
x
Z
scale
Z
bias
Z
mean
Z
variance
b
y
B
\ No newline at end of file
test/onnx/batch_norm_flat_test.onnx
0 → 100644
View file @
7f97b8ef
batch_norm_flat_test:
J
x
scale
bias
mean
variancey"BatchNormalization*
epsilon75batch_norm_flat_testZ
x
Z
scale
Z
bias
Z
mean
Z
variance
b
y
B
\ No newline at end of file
test/onnx/batch_norm_invalid_bias_rank_test.onnx
0 → 100644
View file @
7f97b8ef
!batch_norm_invalid_bias_rank_test:
7
x
scale
bias
mean
variancey"BatchNormalization!batch_norm_invalid_bias_rank_testZ
x
Z
scale
Z
bias
Z
mean
Z
variance
b
y
B
\ No newline at end of file
Prev
1
…
15
16
17
18
19
20
21
22
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment