Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
84725d72
Commit
84725d72
authored
Feb 16, 2023
by
charlie
Browse files
Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_batch_pass
parents
7f1e8443
bfd77388
Changes
113
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
227 additions
and
130 deletions
+227
-130
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+53
-28
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+9
-2
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+4
-3
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+11
-4
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
+2
-2
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/gathernd.hpp
...targets/gpu/kernels/include/migraphx/kernels/gathernd.hpp
+13
-19
src/targets/gpu/kernels/include/migraphx/kernels/hip.hpp
src/targets/gpu/kernels/include/migraphx/kernels/hip.hpp
+8
-5
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
+2
-3
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
+0
-8
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
+39
-2
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+1
-2
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+13
-7
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+4
-10
test/CMakeLists.txt
test/CMakeLists.txt
+38
-26
test/api/test_gpu.cpp
test/api/test_gpu.cpp
+1
-0
test/api/test_save_load.cpp
test/api/test_save_load.cpp
+0
-1
test/gpu/jit.cpp
test/gpu/jit.cpp
+5
-3
test/memory_coloring_test.cpp
test/memory_coloring_test.cpp
+22
-3
No files found.
src/targets/gpu/fuse_ops.cpp
View file @
84725d72
...
...
@@ -553,11 +553,13 @@ struct find_gemm_pointwise
{
auto
matcher
()
const
{
return
precompile_name
(
"pointwise"
)(
auto
gemm_op
=
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
),
match
::
used_once
()).
bind
(
"gemm"
);
auto
binary_op
=
match
::
all_of
(
match
::
nargs
(
3
),
match
::
either_arg
(
0
,
1
)(
match
::
any_of
(
match
::
standard_shape
(),
match
::
is_constant
()).
bind
(
"c"
),
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
),
match
::
used_once
()).
bind
(
"gemm"
)));
match
::
any_of
(
match
::
standard_shape
(),
match
::
is_constant
()).
bind
(
"c"
),
gemm_op
));
auto
unary_op
=
match
::
all_of
(
match
::
nargs
(
2
),
match
::
arg
(
0
)(
gemm_op
));
return
precompile_name
(
"pointwise"
)(
match
::
any_of
(
binary_op
,
unary_op
));
}
// TODO: Move to matcher.hpp
...
...
@@ -589,61 +591,84 @@ struct find_gemm_pointwise
return
match
::
name
(
"@return"
)(
match
::
args
(
match
::
any_of
(
add
,
mul_add
,
add_mul
)));
}
static
auto
match_mul
(
const
std
::
string
&
input
)
{
auto
mul
=
match_mul_const
(
match_param
(
input
),
"alpha"
);
return
match
::
name
(
"@return"
)(
match
::
args
(
mul
));
}
static
float
get_float
(
instruction_ref
ins
)
{
return
ins
->
get_literal
().
at
<
float
>
();
}
template
<
class
Gemm
>
static
bool
update_gemm
(
Gemm
&
gemm
,
module_ref
pm
,
unsigned
input
)
{
auto
names
=
pm
->
get_parameter_names
();
if
(
names
.
size
()
!=
2
)
return
false
;
std
::
sort
(
names
.
begin
(),
names
.
end
());
unsigned
output
=
input
==
0
?
1
:
0
;
auto
mr
=
match
::
match_instruction
(
*
pm
,
std
::
prev
(
pm
->
end
()),
match_add
(
names
[
input
],
names
[
output
]));
if
(
mr
.
result
==
pm
->
end
())
return
false
;
if
(
contains
(
mr
.
instructions
,
"alpha_mul"
))
if
(
names
.
size
()
==
1
)
{
auto
mr
=
match
::
match_instruction
(
*
pm
,
std
::
prev
(
pm
->
end
()),
match_mul
(
names
[
input
]));
if
(
mr
.
result
==
pm
->
end
())
return
false
;
gemm
.
alpha
*=
get_float
(
mr
.
instructions
[
"alpha"
]);
else
if
(
contains
(
mr
.
instructions
,
"beta_mul"
))
gemm
.
beta
*=
get_float
(
mr
.
instructions
[
"beta"
]);
else
if
(
contains
(
mr
.
instructions
,
"gamma_mul"
)
)
return
true
;
}
else
if
(
names
.
size
()
==
2
)
{
gemm
.
alpha
*=
get_float
(
mr
.
instructions
[
"gamma"
]);
gemm
.
beta
*=
get_float
(
mr
.
instructions
[
"gamma"
]);
unsigned
output
=
input
==
0
?
1
:
0
;
auto
mr
=
match
::
match_instruction
(
*
pm
,
std
::
prev
(
pm
->
end
()),
match_add
(
names
[
input
],
names
[
output
]));
if
(
mr
.
result
==
pm
->
end
())
return
false
;
if
(
contains
(
mr
.
instructions
,
"alpha_mul"
))
gemm
.
alpha
*=
get_float
(
mr
.
instructions
[
"alpha"
]);
else
if
(
contains
(
mr
.
instructions
,
"beta_mul"
))
gemm
.
beta
*=
get_float
(
mr
.
instructions
[
"beta"
]);
else
if
(
contains
(
mr
.
instructions
,
"gamma_mul"
))
{
gemm
.
alpha
*=
get_float
(
mr
.
instructions
[
"gamma"
]);
gemm
.
beta
*=
get_float
(
mr
.
instructions
[
"gamma"
]);
}
return
true
;
}
else
{
return
false
;
}
return
true
;
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
auto
c_ins
=
r
.
instructions
[
"c"
];
auto
gemm
=
any_cast
<
rocblas_gemm
<
op
::
dot
>>
(
gemm_ins
->
get_operator
());
// Already fused gemm
if
(
not
float_equal
(
gemm
.
beta
,
0
))
return
;
gemm
.
beta
=
1
;
if
(
ins
->
inputs
().
size
()
==
3
)
gemm
.
beta
=
1
;
if
(
not
update_gemm
(
gemm
,
ins
->
module_inputs
().
front
(),
ins
->
inputs
().
front
()
==
gemm_ins
?
0
:
1
))
return
;
// const-fold input if not standard shape since rocblas can't handle it
if
(
not
c_ins
->
get_shape
().
standard
())
{
auto
c
=
make_op
(
"contiguous"
);
auto
l
=
c
.
compute
(
c
.
compute_shape
({
c_ins
->
get_shape
()}),
{
c_ins
->
eval
()});
c_ins
=
m
.
add_literal
(
l
.
get_shape
(),
l
.
data
());
}
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
inputs
.
push_back
(
c_ins
);
if
(
ins
->
inputs
().
size
()
==
3
)
{
auto
c_ins
=
r
.
instructions
[
"c"
];
// const-fold input if not standard shape since rocblas can't handle it
if
(
not
c_ins
->
get_shape
().
standard
())
{
auto
c
=
make_op
(
"contiguous"
);
auto
l
=
c
.
compute
(
c
.
compute_shape
({
c_ins
->
get_shape
()}),
{
c_ins
->
eval
()});
c_ins
=
m
.
add_literal
(
l
.
get_shape
(),
l
.
data
());
}
inputs
.
push_back
(
c_ins
);
}
inputs
.
push_back
(
ins
->
inputs
().
back
());
m
.
replace_instruction
(
ins
,
gemm
,
inputs
);
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
84725d72
...
...
@@ -30,6 +30,7 @@
#include <migraphx/gpu/hip.hpp>
#include <migraphx/env.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <unordered_map>
#include <memory>
...
...
@@ -215,6 +216,10 @@ struct context
return
*
current_device
;
}
bool
get_exhaustive_tune_flag
()
const
{
return
exhaustive_tune
;
}
void
set_exhaustive_tune_flag
(
bool
t
)
{
exhaustive_tune
=
t
;
}
hip_device
::
stream
&
get_stream
()
{
return
get_current_device
().
get_stream
();
}
hip_device
::
stream
&
get_stream
(
std
::
size_t
n
)
{
return
get_current_device
().
get_stream
(
n
);
}
...
...
@@ -273,7 +278,8 @@ struct context
auto
v_streams
=
v
.
at
(
"streams"
);
std
::
size_t
n_streams
=
v_streams
.
without_key
().
to
<
std
::
size_t
>
();
this
->
current_device
=
std
::
make_shared
<
hip_device
>
(
0
,
n_streams
);
auto
device
=
get_device_id
();
this
->
current_device
=
std
::
make_shared
<
hip_device
>
(
device
,
n_streams
);
}
void
wait_for
(
any_ptr
queue
)
...
...
@@ -336,7 +342,8 @@ struct context
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
bool
measure_perf
=
false
;
bool
exhaustive_tune
=
false
;
bool
measure_perf
=
false
;
// for event perf timing
shared
<
hip_event_ptr
>
start_event
=
nullptr
;
shared
<
hip_event_ptr
>
stop_event
=
nullptr
;
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
84725d72
...
...
@@ -175,8 +175,9 @@ struct miopen_convolution
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
(),
ctx
.
get_exhaustive_tune_flag
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
...
...
@@ -233,7 +234,7 @@ struct miopen_convolution
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
ctx
.
get_exhaustive_tune_flag
()
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : find convolution failed"
);
algo
=
perf
.
fwd_algo
;
...
...
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
84725d72
...
...
@@ -75,12 +75,19 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using
miopen_problem
=
MIGRAPHX_MANAGE_PTR
(
miopenProblem_t
,
miopenDestroyProblem
);
using
miopen_solution
=
MIGRAPHX_MANAGE_PTR
(
miopenSolution_t
,
miopenDestroySolution
);
inline
miopen_solution
find_solution
(
miopenHandle_t
handle
,
miopenProblem_t
problem
)
inline
miopen_solution
find_solution
(
miopenHandle_t
handle
,
miopenProblem_t
problem
,
bool
tune
=
false
)
{
miopenSolution_t
solution
;
size_t
found
=
0
;
auto
status
=
miopenFindSolutions
(
handle
,
problem
,
nullptr
,
&
solution
,
&
found
,
1
);
auto
result
=
miopen_solution
{
solution
};
size_t
found
=
0
;
miopen_find_options
fo
=
nullptr
;
if
(
tune
)
{
fo
=
make_obj
<
miopen_find_options
>
(
&
miopenCreateFindOptions
);
miopenSetFindOptionTuning
(
fo
.
get
(),
1
);
}
auto
status
=
miopenFindSolutions
(
handle
,
problem
,
fo
.
get
(),
&
solution
,
&
found
,
1
);
auto
result
=
miopen_solution
{
solution
};
if
(
status
!=
miopenStatusSuccess
or
found
==
0
)
MIGRAPHX_THROW
(
"MIOpen miopenFindSolutions failed"
);
return
result
;
...
...
src/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp
View file @
84725d72
...
...
@@ -30,14 +30,14 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module
;
struct
module
_pass_manager
;
namespace
gpu
{
struct
prefuse_ops
{
std
::
string
name
()
const
{
return
"gpu::prefuse_ops"
;
}
void
apply
(
module
&
m
)
const
;
void
apply
(
module
_pass_manager
&
mp
m
)
const
;
};
}
// namespace gpu
...
...
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
View file @
84725d72
...
...
@@ -105,7 +105,7 @@ constexpr auto array_for_each(T& x, Ts&... xs)
}
else
{
using
vec_type
=
std
::
remove_reference_t
<
decltype
(
array2vec
(
x
))
>
;
using
vec_type
=
remove_reference_t
<
decltype
(
array2vec
(
x
))
>
;
f
(
array2vec
(
x
),
__builtin_convertvector
(
array2vec
(
xs
),
vec_type
)...);
}
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
View file @
84725d72
...
...
@@ -72,7 +72,7 @@ __device__ T dpp_mov(T& x)
}
return
output
.
data
;
}
#endif
#endif
// MIGRAPHX_HAS_DPP
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_DPP_HPP
src/targets/gpu/kernels/include/migraphx/kernels/gathernd.hpp
View file @
84725d72
...
...
@@ -26,7 +26,7 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/ops.hpp>
namespace
migraphx
{
template
<
class
T
>
...
...
@@ -53,23 +53,17 @@ __device__ void gathernd(const T& data_t, const U& indices_t, const V& output_t,
auto
indices_shape_lens
=
indices_shape
.
lens
;
auto
data_shape_lens
=
data_shape
.
lens
;
auto
num_slice_dims
=
indices_shape_lens
.
back
();
std
::
size_t
num_slices
=
accumulate
(
indices_shape_lens
.
begin
(),
indices_shape_lens
.
end
()
-
1
,
1
,
std
::
multiplies
<
std
::
size_t
>
());
std
::
size_t
slice_size
=
accumulate
(
data_shape_lens
.
begin
()
+
num_slice_dims
+
batch_dims
,
std
::
size_t
num_slices
=
accumulate
(
indices_shape_lens
.
begin
(),
indices_shape_lens
.
end
()
-
1
,
1
,
op
::
product
{});
std
::
size_t
slice_size
=
accumulate
(
data_shape_lens
.
begin
()
+
num_slice_dims
+
batch_dims
,
data_shape_lens
.
end
(),
1
,
std
::
multiplies
<
std
::
size_t
>
());
const
std
::
size_t
num_batches
=
accumulate
(
data_shape_lens
.
begin
(),
data_shape_lens
.
begin
()
+
batch_dims
,
1
,
std
::
multiplies
<
std
::
size_t
>
());
const
std
::
size_t
data_batch_stride
=
accumulate
(
data_shape_lens
.
begin
()
+
batch_dims
,
data_shape_lens
.
end
(),
1
,
std
::
multiplies
<
std
::
size_t
>
());
const
auto
num_slices_per_batch
=
num_slices
/
num_batches
;
op
::
product
{});
const
std
::
size_t
num_batches
=
accumulate
(
data_shape_lens
.
begin
(),
data_shape_lens
.
begin
()
+
batch_dims
,
1
,
op
::
product
{});
const
std
::
size_t
data_batch_stride
=
accumulate
(
data_shape_lens
.
begin
()
+
batch_dims
,
data_shape_lens
.
end
(),
1
,
op
::
product
{});
const
auto
num_slices_per_batch
=
num_slices
/
num_batches
;
ind
.
global_stride
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
const
auto
*
indices_ptr
=
indices_t
.
data
();
...
...
@@ -83,15 +77,15 @@ __device__ void gathernd(const T& data_t, const U& indices_t, const V& output_t,
int64_t
index
=
slice_indices
[
idx
];
const
std
::
size_t
input_dim_idx
=
batch_dims
+
idx
;
const
auto
input_dim
=
data_shape_lens
[
input_dim_idx
];
assert
(
index
>=
-
static_cast
<
int64_t
>
(
input_dim
)
and
index
<
static_cast
<
int64_t
>
(
input_dim
));
MIGRAPHX_ASSERT
(
index
>=
-
static_cast
<
int64_t
>
(
input_dim
)
and
index
<
static_cast
<
int64_t
>
(
input_dim
));
if
(
index
<
0
)
index
+=
input_dim
;
std
::
size_t
size_from_slice_dims
=
accumulate
(
data_shape_lens
.
begin
()
+
batch_dims
+
idx
+
1
,
data_shape_lens
.
begin
()
+
batch_dims
+
num_slice_dims
,
slice_size
,
std
::
multiplies
<
std
::
size_t
>
()
);
op
::
product
{}
);
relative_slice_offset
+=
index
*
size_from_slice_dims
;
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/hip.hpp
View file @
84725d72
...
...
@@ -24,11 +24,14 @@
#ifndef MIGRAPHX_GUARD_KERNELS_HIP_HPP
#define MIGRAPHX_GUARD_KERNELS_HIP_HPP
// Workaround macro redefinition issue with clang tidy
#if defined(__HIP_PLATFORM_HCC__) && defined(MIGRAPHX_USE_CLANG_TIDY)
#undef __HIP_PLATFORM_HCC__ // NOLINT
#endif
#ifndef MIGRAPHX_USE_HIPRTC
#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
#include <hip/math_functions.h>
#include <hip/hip_math_constants.h>
#elif defined(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS)
#include <hip/hip_common.h>
#include <hip/hip_math_constants.h>
#endif
#endif // MIGRAPHX_GUARD_KERNELS_HIP_HPP
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
View file @
84725d72
...
...
@@ -28,8 +28,7 @@
#include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/type_traits.hpp>
#include <hip/hip_fp16.h>
#include <hip/math_functions.h>
#include <migraphx/kernels/hip.hpp>
namespace
migraphx
{
...
...
@@ -222,7 +221,7 @@ constexpr auto min(const T& a, const U& b)
template
<
class
T
,
MIGRAPHX_REQUIRES
(
is_same
<
vec_type
<
T
>,
half
>
{})
>
constexpr
T
sin
(
T
x
)
{
constexpr
const
T
shift
=
M_PI_2
;
constexpr
const
T
shift
=
HIP_PIO2_F
;
return
migraphx
::
cos
(
shift
-
x
);
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
View file @
84725d72
...
...
@@ -76,14 +76,6 @@ struct shape
constexpr
index_int
index
(
index_array
x
)
const
{
return
x
.
dot
(
strides
);
}
constexpr
index_int
index
(
std
::
initializer_list
<
index_int
>
x
)
const
{
index_int
idx
=
0
;
for
(
index_int
i
=
0
;
i
<
x
.
size
();
i
++
)
idx
+=
*
(
x
.
begin
()
+
i
)
*
strides
[
i
];
return
idx
;
}
constexpr
index_int
index
(
index_int
i
)
const
{
if
(
this
->
standard
())
...
...
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
View file @
84725d72
...
...
@@ -28,8 +28,45 @@
namespace
migraphx
{
using
index_int
=
std
::
uint32_t
;
using
diff_int
=
std
::
int32_t
;
#if defined(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS) and defined(MIGRAPHX_USE_HIPRTC)
using
int8_t
=
signed
char
;
using
uint8_t
=
unsigned
char
;
using
int16_t
=
signed
short
;
using
uint16_t
=
unsigned
short
;
using
int32_t
=
signed
int
;
using
uint32_t
=
unsigned
int
;
using
int64_t
=
signed
long
long
;
using
uint64_t
=
unsigned
long
long
;
#elif defined(MIGRAPHX_USE_HIPRTC)
using
int8_t
=
__hip_int8_t
;
using
uint8_t
=
__hip_uint8_t
;
using
int16_t
=
__hip_int16_t
;
using
uint16_t
=
__hip_uint16_t
;
using
int32_t
=
__hip_int32_t
;
using
uint32_t
=
__hip_uint32_t
;
using
int64_t
=
__hip_int64_t
;
using
uint64_t
=
__hip_uint64_t
;
#else
using
int8_t
=
std
::
int8_t
;
using
uint8_t
=
std
::
uint8_t
;
using
int16_t
=
std
::
int16_t
;
using
uint16_t
=
std
::
uint16_t
;
using
int32_t
=
std
::
int32_t
;
using
uint32_t
=
std
::
uint32_t
;
using
int64_t
=
std
::
int64_t
;
using
uint64_t
=
std
::
uint64_t
;
#endif // MIGRAPHX_USE_HIPRTC
using
index_int
=
uint32_t
;
using
diff_int
=
int32_t
;
static_assert
(
sizeof
(
int8_t
)
==
1
,
"int8_t must be 1 bytes"
);
static_assert
(
sizeof
(
uint8_t
)
==
1
,
"uint8_t must be 1 bytes"
);
static_assert
(
sizeof
(
int16_t
)
==
2
,
"int16_t must be 2 bytes"
);
static_assert
(
sizeof
(
uint16_t
)
==
2
,
"uint16_t must be 2 bytes"
);
static_assert
(
sizeof
(
int32_t
)
==
4
,
"int32_t must be 4 bytes"
);
static_assert
(
sizeof
(
uint32_t
)
==
4
,
"uint32_t must be 4 bytes"
);
static_assert
(
sizeof
(
int64_t
)
==
8
,
"int64_t must be 8 bytes"
);
static_assert
(
sizeof
(
uint64_t
)
==
8
,
"uint64_t must be 8 bytes"
);
#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT
...
...
src/targets/gpu/lowering.cpp
View file @
84725d72
...
...
@@ -83,8 +83,7 @@ struct miopen_apply
auto
&
ctx
=
get_context
();
int8_x4_format
=
get_int8_x4_format
(
ctx
);
compute_fp32
=
get_compute_fp32_flag
();
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"contiguous"
);
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
84725d72
...
...
@@ -26,6 +26,8 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/dead_code_elimination.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -90,7 +92,9 @@ struct find_layernorm
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
float
eps
=
0
;
if
(
contains
(
r
.
instructions
,
"eps"
))
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
m
.
replace_instruction
(
ins
,
layernorm
{
eps
},
x_ins
);
}
...
...
@@ -100,24 +104,26 @@ struct find_add_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
(
)(
match
::
v
ar
(
"x"
)
(
match
::
name
(
"add"
)(
match
::
used_once
()).
bind
(
"add"
)));
return
match
::
name
(
"gpu::pre
layernorm
"
)(
match
::
ar
gs
(
match
::
name
(
"add"
)(
match
::
used_once
()).
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
(
);
auto
op
=
any_cast
<
layernorm
>
(
ins
->
get_operator
()
);
m
.
replace_instruction
(
ins
,
add_layernorm
{
eps
},
add_ins
->
inputs
());
m
.
replace_instruction
(
ins
,
add_layernorm
{
op
.
epsilon
},
add_ins
->
inputs
());
}
};
}
// namespace
void
prefuse_ops
::
apply
(
module
&
m
)
const
void
prefuse_ops
::
apply
(
module
_pass_manager
&
mp
m
)
const
{
match
::
find_matches
(
m
,
find_add_layernorm
{},
find_layernorm
{});
match
::
find_matches
(
mpm
.
get_module
(),
find_layernorm
{});
mpm
.
run_pass
(
dead_code_elimination
{});
match
::
find_matches
(
mpm
.
get_module
(),
find_add_layernorm
{});
}
}
// namespace gpu
...
...
src/targets/gpu/target.cpp
View file @
84725d72
...
...
@@ -38,6 +38,7 @@
#include <migraphx/layout_nhwc.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/optimize_module.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
...
...
@@ -90,6 +91,7 @@ pass enable_pass(bool enabled, pass p)
std
::
vector
<
pass
>
target
::
get_passes
(
migraphx
::
context
&
gctx
,
const
compile_options
&
options
)
const
{
auto
&
ctx
=
any_cast
<
context
>
(
gctx
);
ctx
.
set_exhaustive_tune_flag
(
options
.
exhaustive_tune
);
std
::
set
<
shape
::
type_t
>
unsupported_types
(
shape
::
types
().
begin
(),
shape
::
types
().
end
());
unsupported_types
.
erase
(
shape
::
type_t
::
float_type
);
unsupported_types
.
erase
(
shape
::
type_t
::
half_type
);
...
...
@@ -118,21 +120,13 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
rewrite_pooling
{},
dead_code_elimination
{},
rewrite_gelu
{},
dead_code_elimination
{},
eliminate_common_subexpression
{},
dead_code_elimination
{},
simplify_algebra
{},
simplify_reshapes
{},
optimize_module
{},
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_NHWC
{}),
layout_nhwc
{}),
dead_code_elimination
{},
simplify_reshapes
{},
simplify_algebra
{},
prefuse_ops
{},
dead_code_elimination
{},
auto_contiguous
{},
simplify_reshapes
{},
propagate_constant
{},
dead_code_elimination
{},
optimize_module
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
dead_code_elimination
{},
fuse_mlir
{
&
ctx
},
...
...
test/CMakeLists.txt
View file @
84725d72
#####################################################################################
#
####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
...
...
@@ -20,7 +20,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
#
####################################################################################
cmake_policy
(
SET CMP0057 NEW
)
...
...
@@ -49,27 +49,31 @@ function(add_test_command NAME EXE)
set_tests_properties
(
${
NAME
}
PROPERTIES DISABLED On
)
elseif
(
WIN32
)
set
(
WINPATH
)
foreach
(
PATH
${
CMAKE_FIND_ROOT_PATH
}
)
list
(
APPEND WINPATH
${
PATH
}
/bin
)
endforeach
()
file
(
GENERATE OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/test_
${
NAME
}
.cmd"
CONTENT
"set PATH=
${
WINPATH
}
;%PATH%
%1
${
ARGN
}
"
)
add_test
(
NAME
${
NAME
}
COMMAND
${
WINE_CMD
}
cmd /c
"
${
CMAKE_CURRENT_BINARY_DIR
}
/test_
${
NAME
}
.cmd"
$<TARGET_FILE:
${
EXE
}
>
)
else
()
if
(
MIGRAPHX_TEST_GDB
)
# add_test(NAME ${NAME} COMMAND ${MIGRAPHX_GDB}
#
--batch
#
--return-child-result
#
-ex "set disable-randomization off"
#
-ex run
#
-ex backtrace
#
--args $<TARGET_FILE:${EXE}> ${ARGN})
# add_test(NAME ${NAME} COMMAND ${MIGRAPHX_GDB}
# --batch
# --return-child-result
# -ex "set disable-randomization off"
# -ex run
# -ex backtrace
# --args $<TARGET_FILE:${EXE}> ${ARGN})
set
(
TEST_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/gdb/test_
${
NAME
}
)
file
(
MAKE_DIRECTORY
${
TEST_DIR
}
)
if
(
NOT EXISTS
${
TEST_DIR
}
)
if
(
NOT EXISTS
${
TEST_DIR
}
)
message
(
FATAL_ERROR
"Failed to create test directory:
${
TEST_DIR
}
"
)
endif
()
file
(
GENERATE OUTPUT
"
${
TEST_DIR
}
/run.cmake"
CONTENT
"
# Remove previous core dump
...
...
@@ -90,22 +94,27 @@ function(add_test_command NAME EXE)
add_test
(
NAME
${
NAME
}
COMMAND
${
EXE
}
${
ARGN
}
)
endif
()
endif
()
set_tests_properties
(
${
NAME
}
PROPERTIES FAIL_REGULAR_EXPRESSION
"FAILED"
)
endfunction
()
function
(
add_test_executable TEST_NAME
)
add_executable
(
${
TEST_NAME
}
EXCLUDE_FROM_ALL
${
ARGN
}
)
add_executable
(
${
TEST_NAME
}
EXCLUDE_FROM_ALL
${
ARGN
}
)
target_link_libraries
(
${
TEST_NAME
}
${
CMAKE_THREAD_LIBS_INIT
}
)
# Cmake does not add flags correctly for gcc
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"GNU"
)
if
(
CMAKE_CXX_COMPILER_ID MATCHES
"GNU"
)
set_target_properties
(
${
TEST_NAME
}
PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread
)
endif
()
separate_arguments
(
MIOPEN_TEST_FLAGS_ARGS UNIX_COMMAND
${
MIOPEN_TEST_FLAGS
}
)
if
(
MIOPEN_TEST_ALL
)
set
(
TEST_COMMAND
${
TEST_NAME
}
${
MIOPEN_TEST_FLOAT_ARG
}
--all
${
MIOPEN_TEST_FLAGS_ARGS
}
)
else
()
set
(
TEST_COMMAND
${
TEST_NAME
}
${
MIOPEN_TEST_FLOAT_ARG
}
${
MIOPEN_TEST_FLAGS_ARGS
}
)
endif
()
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
...
...
@@ -129,11 +138,11 @@ if(MIGRAPHX_ENABLE_GPU)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
add_test_executable
(
test_gpu_
${
BASE_NAME
}
${
TEST
}
)
rocm_clang_tidy_check
(
test_gpu_
${
BASE_NAME
}
)
set_tests_properties
(
test_gpu_
${
BASE_NAME
}
PROPERTIES
COST 10
set_tests_properties
(
test_gpu_
${
BASE_NAME
}
PROPERTIES
COST 10
RESOURCE_LOCK gpu
)
target_link_libraries
(
test_gpu_
${
BASE_NAME
}
migraphx_gpu
)
target_link_libraries
(
test_gpu_
${
BASE_NAME
}
migraphx_gpu
migraphx_kernels
)
endforeach
()
endif
()
...
...
@@ -145,8 +154,8 @@ if(MIGRAPHX_ENABLE_FPGA)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
add_test_executable
(
test_fpga_
${
BASE_NAME
}
${
TEST
}
)
rocm_clang_tidy_check
(
test_fpga_
${
BASE_NAME
}
)
set_tests_properties
(
test_fpga_
${
BASE_NAME
}
PROPERTIES
COST 10
set_tests_properties
(
test_fpga_
${
BASE_NAME
}
PROPERTIES
COST 10
RESOURCE_LOCK fpga
)
target_link_libraries
(
test_fpga_
${
BASE_NAME
}
migraphx_fpga
)
...
...
@@ -155,7 +164,8 @@ endif()
# Onnx test
set
(
TEST_ONNX_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/onnx
)
file
(
GLOB ONNX_TESTS
${
TEST_ONNX_DIR
}
/*.cpp
)
file
(
GLOB ONNX_TESTS
${
TEST_ONNX_DIR
}
/*.cpp
)
foreach
(
ONNX_TEST
${
ONNX_TESTS
}
)
get_filename_component
(
BASE_NAME
${
ONNX_TEST
}
NAME_WE
)
set
(
TEST_NAME test_
${
BASE_NAME
}
)
...
...
@@ -163,7 +173,7 @@ foreach(ONNX_TEST ${ONNX_TESTS})
rocm_clang_tidy_check
(
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx_onnx migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
> WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
> WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
endforeach
()
...
...
@@ -174,26 +184,26 @@ add_executable(test_tf tf/tf_test.cpp)
rocm_clang_tidy_check
(
test_tf
)
target_link_libraries
(
test_tf migraphx_tf migraphx_ref
)
target_include_directories
(
test_tf PUBLIC include
)
add_test
(
NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY
${
TEST_TF_DIR
}
)
add_test
(
NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY
${
TEST_TF_DIR
}
)
add_dependencies
(
tests test_tf
)
add_dependencies
(
check test_tf
)
add_subdirectory
(
api
)
add_subdirectory
(
verify
)
if
(
MIGRAPHX_ENABLE_PYTHON
)
add_subdirectory
(
py
)
add_subdirectory
(
py
)
endif
()
function
(
test_header NAME HEADER
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
"#include <
${
HEADER
}
>
\n
int main() {}
\n
"
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
"#include <
${
HEADER
}
>
\n
"
)
add_test_executable
(
${
NAME
}
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
)
endfunction
()
...
...
@@ -206,6 +216,7 @@ function(test_headers PREFIX)
string
(
MAKE_C_IDENTIFIER
${
HEADER_REL
}
TEST_NAME
)
get_filename_component
(
BASE_NAME
${
HEADER
}
NAME_WE
)
test_header
(
header_
${
TEST_NAME
}
${
PREFIX
}
/
${
BASE_NAME
}
.hpp
)
if
(
MIGRAPHX_ENABLE_GPU
)
target_link_libraries
(
header_
${
TEST_NAME
}
migraphx_gpu
)
endif
()
...
...
@@ -214,6 +225,7 @@ endfunction()
test_headers
(
migraphx
${
CMAKE_SOURCE_DIR
}
/src/include/migraphx/*.hpp
)
test_headers
(
migraphx/ref
${
CMAKE_SOURCE_DIR
}
/src/targets/ref/include/migraphx/ref/*.hpp
)
if
(
MIGRAPHX_ENABLE_GPU
)
test_headers
(
migraphx/gpu
${
CMAKE_SOURCE_DIR
}
/src/targets/gpu/include/migraphx/gpu/*.hpp
)
test_headers
(
migraphx/gpu
${
CMAKE_SOURCE_DIR
}
/src/targets/gpu/include/migraphx/gpu/*.hpp
)
endif
()
test/api/test_gpu.cpp
View file @
84725d72
...
...
@@ -35,6 +35,7 @@ TEST_CASE(load_and_run)
auto
shapes_before
=
p
.
get_output_shapes
();
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
options
.
set_exhaustive_tune_flag
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
auto
shapes_after
=
p
.
get_output_shapes
();
CHECK
(
shapes_before
.
size
()
==
1
);
...
...
test/api/test_save_load.cpp
View file @
84725d72
...
...
@@ -30,7 +30,6 @@ TEST_CASE(load_save_default)
std
::
string
filename
=
"migraphx_api_load_save.mxr"
;
auto
p1
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
auto
s1
=
p1
.
get_output_shapes
();
migraphx
::
save
(
p1
,
filename
.
c_str
());
auto
p2
=
migraphx
::
load
(
filename
.
c_str
());
auto
s2
=
p2
.
get_output_shapes
();
...
...
test/gpu/jit.cpp
View file @
84725d72
...
...
@@ -35,13 +35,14 @@
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx_kernels.hpp>
// NOLINTNEXTLINE
const
std
::
string
write_2s
=
R"__migraphx__(
#include <hip/hip_runtime.h>
extern "C" {
__global__ void write(
int8_t
* data)
__global__ void write(
char
* data)
{
int num = threadIdx.x + blockDim.x * blockIdx.x;
data[num] = 2;
...
...
@@ -58,7 +59,7 @@ const std::string add_2s_binary = R"__migraphx__(
#include <hip/hip_runtime.h>
extern "C" {
__global__ void add_2(
std::int8_t* x, std::int8_t
* y)
__global__ void add_2(
char* x, char
* y)
{
int num = threadIdx.x + blockDim.x * blockIdx.x;
y[num] = x[num] + 2;
...
...
@@ -137,7 +138,8 @@ int main() {}
const
std
::
string
math_template
=
R"__migraphx__(
#include <migraphx/kernels/pointwise.hpp>
#include <migraphx/kernels/math.hpp>
#include <migraphx/kernels/types.hpp>
using namespace migraphx;
extern "C" {
__global__ void kernel(${type}* p)
{
...
...
test/memory_coloring_test.cpp
View file @
84725d72
...
...
@@ -691,7 +691,7 @@ TEST_CASE(test38)
auto
p83
=
m
.
add_instruction
(
pass_op
{},
p78
,
p77
);
m
.
add_instruction
(
pass_op
{},
output
,
p83
,
p63
);
run_pass
(
m
);
CHECK
(
m
.
get_parameter_shape
(
"scratch"
).
bytes
()
==
7225344
);
// Optimal solution is
6422528
CHECK
(
m
.
get_parameter_shape
(
"scratch"
).
bytes
()
==
6422528
);
CHECK
(
no_allocate
(
m
));
}
...
...
@@ -729,7 +729,7 @@ TEST_CASE(test39)
run_pass
(
*
smod
);
}
CHECK
(
mm
->
get_parameter_shape
(
"scratch"
).
bytes
()
==
4
);
CHECK
(
mm
->
get_parameter_shape
(
"scratch"
).
bytes
()
==
1
);
CHECK
(
then_mod
->
get_parameter_shape
(
"scratch"
).
bytes
()
==
24
);
CHECK
(
else_mod
->
get_parameter_shape
(
"scratch"
).
bytes
()
==
24
);
CHECK
(
no_allocate
(
*
mm
));
...
...
@@ -3374,7 +3374,7 @@ TEST_CASE(rnn_dom)
m
.
add_instruction
(
pass_op
{},
moutput
,
mx250
,
mx249
,
mx248
);
run_pass
(
m
);
CHECK
(
m
.
get_parameter_shape
(
"scratch"
).
bytes
()
==
1600
);
CHECK
(
m
.
get_parameter_shape
(
"scratch"
).
bytes
()
==
1824
);
// Optimal is
1600
CHECK
(
no_allocate
(
m
));
CHECK
(
is_disjoint
({
mx0
,
mx8
}));
CHECK
(
is_disjoint
({
mx0
,
mx8
}));
...
...
@@ -3790,4 +3790,23 @@ TEST_CASE(literal_test)
CHECK
(
lit
==
result
);
}
TEST_CASE
(
test_tuple
)
{
migraphx
::
module
m
;
auto
s1
=
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
8
}};
auto
s2
=
migraphx
::
shape
{
migraphx
::
shape
::
half_type
,
{
10
}};
auto
s
=
migraphx
::
shape
{{
s1
,
s2
}};
auto
a1
=
add_alloc
(
m
,
s
);
auto
m1
=
m
.
add_instruction
(
pass_op
{},
a1
);
auto
a2
=
add_alloc
(
m
,
{
migraphx
::
shape
::
float_type
,
{
4
}});
m
.
add_instruction
(
pass_op
{},
a2
,
m1
);
run_pass
(
m
);
CHECK
(
m
.
get_parameter_shape
(
"scratch"
).
bytes
()
==
68
);
CHECK
(
no_allocate
(
m
));
CHECK
(
is_disjoint
({
a1
,
a2
}));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment