Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
13d14c66
Commit
13d14c66
authored
Oct 24, 2023
by
Brian Pickrell
Browse files
Merge branch 'develop' into dyn_resize_gather
parents
f4e7d9d9
d1abf06f
Changes
420
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
699 additions
and
449 deletions
+699
-449
src/targets/gpu/jit/ck_gemm.cpp
src/targets/gpu/jit/ck_gemm.cpp
+3
-225
src/targets/gpu/jit/ck_gemm_softmax_gemm.cpp
src/targets/gpu/jit/ck_gemm_softmax_gemm.cpp
+236
-0
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+1
-3
src/targets/gpu/jit/roialign.cpp
src/targets/gpu/jit/roialign.cpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/ck.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ck.hpp
+11
-0
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
+0
-11
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp
...kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp
+74
-0
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+33
-30
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+29
-2
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+124
-14
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+80
-20
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+3
-0
src/targets/gpu/time_op.cpp
src/targets/gpu/time_op.cpp
+9
-12
src/tf/parse_reshape.cpp
src/tf/parse_reshape.cpp
+1
-2
src/verify_args.cpp
src/verify_args.cpp
+19
-17
test/CMakeLists.txt
test/CMakeLists.txt
+16
-92
test/auto_contiguous_test.cpp
test/auto_contiguous_test.cpp
+26
-2
test/check_shapes_test.cpp
test/check_shapes_test.cpp
+30
-15
test/eliminate_allocation_test.cpp
test/eliminate_allocation_test.cpp
+1
-1
test/eliminate_concat_test.cpp
test/eliminate_concat_test.cpp
+2
-2
No files found.
src/targets/gpu/jit/ck_gemm.cpp
View file @
13d14c66
...
...
@@ -27,6 +27,7 @@
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/ck.hpp>
#include <migraphx/env.hpp>
#include <migraphx/file_buffer.hpp>
#include <migraphx/gpu/compile_gen.hpp>
...
...
@@ -37,8 +38,6 @@
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include "ck/host/device_gemm_multiple_d.hpp"
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -46,12 +45,6 @@ namespace gpu {
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_LOG_CK_GEMM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_TUNING
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_TUNING_VALUE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_CK_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TUNE_CK
);
// NOLINTNEXTLINE
static
const
char
*
const
ck_gemm_kernel
=
R"__migraphx__(
#include <args.hpp>
...
...
@@ -79,220 +72,10 @@ MIGRAPHX_GLOBAL void ${kernel}(${params})
)__migraphx__"
;
// NOLINTNEXTLINE
static
const
char
*
const
disable_warning_pragma
=
R"__migraphx__(
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"
${content}
#pragma clang diagnostic pop
)__migraphx__"
;
template
<
class
P
>
static
std
::
string
ck_disable_warnings
(
P
p
)
{
return
interpolate_string
(
disable_warning_pragma
,
{{
"content"
,
std
::
string
{
p
.
first
,
p
.
second
}}});
}
static
std
::
unordered_map
<
std
::
string
,
std
::
string
>
create_ck_header_strings
()
{
std
::
unordered_map
<
std
::
string
,
std
::
string
>
result
;
auto
ck_headers
=
ck
::
host
::
GetHeaders
();
std
::
transform
(
ck_headers
.
begin
(),
ck_headers
.
end
(),
std
::
inserter
(
result
,
result
.
begin
()),
[
&
](
auto
&&
p
)
{
return
std
::
make_pair
(
p
.
first
,
ck_disable_warnings
(
p
.
second
));
});
return
result
;
}
static
std
::
vector
<
src_file
>
create_ck_headers
()
{
static
const
auto
&
header_strings
=
create_ck_header_strings
();
std
::
vector
<
src_file
>
srcs
;
std
::
transform
(
header_strings
.
begin
(),
header_strings
.
end
(),
std
::
back_inserter
(
srcs
),
[
&
](
auto
&&
p
)
{
return
src_file
{
fs
::
path
{
p
.
first
},
{
p
.
second
.
data
(),
p
.
second
.
data
()
+
p
.
second
.
size
()}};
});
return
srcs
;
}
static
const
std
::
vector
<
src_file
>&
ck_headers
()
{
static
const
auto
&
headers
=
create_ck_headers
();
return
headers
;
}
static
bool
transposed_matrix
(
const
shape
&
s
)
{
return
s
.
strides
().
back
()
!=
1
;
}
using
tuning_entry
=
std
::
pair
<
std
::
vector
<
shape
>
,
size_t
>
;
static
std
::
vector
<
tuning_entry
>
read_tuning
(
const
std
::
string
&
s
)
{
if
(
not
fs
::
exists
(
s
))
return
{};
return
from_value
<
std
::
vector
<
tuning_entry
>>
(
from_json_string
(
read_string
(
s
)));
}
static
float
matrix_distance
(
const
shape
&
x
,
const
shape
&
y
)
{
if
(
x
.
type
()
!=
y
.
type
())
return
std
::
numeric_limits
<
float
>::
max
();
if
(
transposed_matrix
(
x
)
!=
transposed_matrix
(
y
))
return
std
::
numeric_limits
<
float
>::
max
();
auto
sum_squared
=
std
::
inner_product
(
x
.
lens
().
rbegin
(),
x
.
lens
().
rbegin
()
+
2
,
y
.
lens
().
rbegin
(),
0
,
std
::
plus
<>
{},
[](
auto
a
,
auto
b
)
{
return
(
a
-
b
)
*
(
a
-
b
);
});
return
std
::
sqrt
(
sum_squared
);
}
static
std
::
size_t
get_tuning_for
(
const
std
::
vector
<
shape
>&
inputs
)
{
static
auto
tuning
=
read_tuning
(
string_value_of
(
MIGRAPHX_CK_TUNING
{},
""
));
if
(
tuning
.
empty
())
{
std
::
cout
<<
"*********** Warning: No CK tuning! for config:"
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
0
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
1
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
2
]
<<
std
::
endl
;
}
auto
it
=
std
::
find_if
(
tuning
.
begin
(),
tuning
.
end
(),
[
&
](
const
auto
&
p
)
{
return
p
.
first
==
inputs
;
});
if
(
it
==
tuning
.
end
())
{
std
::
cout
<<
"*********** Warning: CK tuning missing for config!"
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
0
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
1
]
<<
std
::
endl
;
std
::
cout
<<
" "
<<
inputs
[
2
]
<<
std
::
endl
;
std
::
vector
<
std
::
pair
<
float
,
std
::
size_t
>>
w
;
std
::
transform
(
tuning
.
begin
(),
tuning
.
end
(),
std
::
back_inserter
(
w
),
[
&
](
const
auto
&
p
)
{
if
(
inputs
.
size
()
<
3
or
p
.
first
.
size
()
<
3
)
MIGRAPHX_THROW
(
"Invalid CK config"
);
auto
avg_distance
=
std
::
inner_product
(
p
.
first
.
begin
(),
p
.
first
.
begin
()
+
3
,
inputs
.
begin
(),
0.0
f
,
std
::
plus
<>
{},
[](
const
auto
&
x
,
const
auto
&
y
)
{
return
matrix_distance
(
x
,
y
)
/
3.0
f
;
});
return
std
::
make_pair
(
avg_distance
,
p
.
second
);
});
std
::
sort
(
w
.
begin
(),
w
.
end
());
std
::
size_t
default_value
=
4
;
if
(
not
w
.
empty
())
default_value
=
w
.
front
().
second
;
auto
tuning_val
=
value_of
(
MIGRAPHX_CK_TUNING_VALUE
{},
default_value
);
std
::
cout
<<
"*********** Warning: CK try tuning: "
<<
tuning_val
<<
std
::
endl
;
return
tuning_val
;
}
return
it
->
second
;
}
struct
ck_gemm_compiler
:
compiler
<
ck_gemm_compiler
>
{
static
std
::
string
get_layout
(
const
shape
&
s
)
{
return
transposed_matrix
(
s
)
?
"ck::tensor_layout::gemm::ColumnMajor"
:
"ck::tensor_layout::gemm::RowMajor"
;
}
static
ck
::
host
::
DataType
get_type
(
const
shape
&
s
)
{
if
(
s
.
type
()
==
shape
::
half_type
)
return
ck
::
host
::
DataType
::
Half
;
else
if
(
s
.
type
()
==
shape
::
float_type
)
return
ck
::
host
::
DataType
::
Float
;
else
if
(
s
.
type
()
==
shape
::
int8_type
)
return
ck
::
host
::
DataType
::
Int8
;
else
if
(
s
.
type
()
==
shape
::
int32_type
)
return
ck
::
host
::
DataType
::
Int32
;
MIGRAPHX_THROW
(
"Unsupported ck type"
);
}
template
<
class
Iterator
,
class
F
>
static
std
::
string
ck_tuple
(
Iterator
start
,
Iterator
last
,
F
f
)
{
std
::
vector
<
std
::
string
>
s
;
std
::
transform
(
start
,
last
,
std
::
back_inserter
(
s
),
f
);
return
"ck::Tuple<"
+
join_strings
(
s
,
","
)
+
">"
;
}
static
std
::
vector
<
shape
>
adjust_inputs
(
std
::
vector
<
shape
>
inputs
,
bool
&
swap_inputs
)
{
swap_inputs
=
false
;
auto
c_shape
=
inputs
.
back
();
if
(
not
transposed_matrix
(
c_shape
))
return
inputs
;
std
::
vector
<
int64_t
>
perm
(
c_shape
.
lens
().
size
());
std
::
iota
(
perm
.
begin
(),
perm
.
end
(),
0
);
std
::
swap
(
perm
[
perm
.
size
()
-
1
],
perm
[
perm
.
size
()
-
2
]);
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
shape
s
)
{
return
reorder_shape
(
s
,
perm
);
});
swap_inputs
=
true
;
return
inputs
;
}
static
std
::
size_t
get_batch_count
(
const
shape
&
s
)
{
return
std
::
accumulate
(
s
.
lens
().
rbegin
()
+
2
,
s
.
lens
().
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
}
static
void
fold_batch_dims
(
shape
&
s
)
{
auto
lens
=
s
.
lens
();
if
(
lens
.
size
()
<=
2
)
return
;
auto
batch_count
=
get_batch_count
(
s
);
auto
m1
=
lens
.
at
(
lens
.
size
()
-
2
);
auto
m2
=
lens
.
at
(
lens
.
size
()
-
1
);
if
(
transposed_matrix
(
s
))
s
=
shape
{
s
.
type
(),
{
m1
,
m2
*
batch_count
}};
else
s
=
shape
{
s
.
type
(),
{
m1
*
batch_count
,
m2
}};
}
static
void
remove_batch_dims
(
shape
&
s
)
{
auto
lens
=
s
.
lens
();
if
(
lens
.
size
()
<=
2
)
return
;
auto
m1
=
lens
.
at
(
lens
.
size
()
-
2
);
auto
m2
=
lens
.
at
(
lens
.
size
()
-
1
);
s
=
shape
{
s
.
type
(),
{
m1
,
m2
}};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"ck_gemm"
,
"gpu::ck_gemm"
};
}
static
bool
standard_batch
(
const
shape
&
s
)
{
if
(
s
.
lens
().
size
()
<
3
)
return
true
;
std
::
vector
<
std
::
size_t
>
lens
(
s
.
lens
().
begin
(),
s
.
lens
().
end
()
-
2
);
std
::
vector
<
std
::
size_t
>
strides
(
s
.
strides
().
begin
(),
s
.
strides
().
end
()
-
2
);
auto
base
=
*
(
s
.
lens
().
end
()
-
2
)
*
*
(
s
.
lens
().
end
()
-
1
);
std
::
transform
(
strides
.
begin
(),
strides
.
end
(),
strides
.
begin
(),
[
&
](
auto
stride
)
{
return
stride
/
base
;
});
return
shape
{
s
.
type
(),
lens
,
strides
}.
standard
();
}
bool
can_fold_batch
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
const
auto
&
b_shape
=
inputs
[
1
];
if
(
std
::
any_of
(
inputs
.
begin
()
+
2
,
inputs
.
end
()
-
1
,
[](
auto
input
)
{
return
not
standard_batch
(
input
);
}))
return
false
;
const
auto
&
b_strides
=
b_shape
.
strides
();
return
std
::
all_of
(
b_strides
.
begin
(),
b_strides
.
end
()
-
2
,
[](
auto
stride
)
{
return
stride
==
0
;
});
}
ck
::
host
::
device_gemm_multiple_d
::
Problem
create_problem
(
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
...
...
@@ -301,8 +84,7 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
const
auto
&
c_shape
=
inputs
.
back
();
// cppcheck-suppress unreadVariable
auto
rank
=
a_shape
.
ndim
();
auto
rank
=
a_shape
.
ndim
();
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
m
=
c_shape
.
lens
()[
rank
-
2
];
m
=
can_fold_batch
(
inputs
)
?
m
*
batch_count
:
m
;
...
...
@@ -352,12 +134,8 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
const
auto
&
a_shape
=
inputs
[
0
];
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
c_shape
=
inputs
.
back
();
auto
tuning_value
=
v
.
get
(
"tuning_value"
,
4
);
if
(
not
v
.
contains
(
"tuning_value"
))
tuning_value
=
get_tuning_for
({
a_shape
,
b_shape
,
c_shape
});
auto
tuning_value
=
v
.
get
(
"tuning_value"
,
34
);
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
problem
=
create_problem
(
inputs
,
v
);
...
...
src/targets/gpu/jit/ck_gemm_softmax_gemm.cpp
0 → 100644
View file @
13d14c66
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <fstream>
#include <migraphx/filesystem.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/env.hpp>
#include <migraphx/file_buffer.hpp>
#include <migraphx/gpu/ck.hpp>
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/module.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
using
namespace
migraphx
::
gpu
::
gen
;
// NOLINT
// NOLINTNEXTLINE
static
const
char
*
const
ck_gemm_softmax_gemm_kernel
=
R"__migraphx__(
#include <args.hpp>
#include <migraphx/kernels/ck_gemm_softmax_gemm.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <${include}>
namespace migraphx {
${preamble}
extern "C" {
MIGRAPHX_GLOBAL void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) {
auto settings = make_ck_gemm_softmax_gemm_settings(MIGRAPHX_MAKE_CONSTANT(float{SCALE}));
ck_gemm_softmax_gemm<${solution}, ${blocks_per_batch}>(settings, xs...);
});
}
}
} // namespace migraphx
)__migraphx__"
;
struct
ck_gemm_softmax_gemm_compiler
:
compiler
<
ck_gemm_softmax_gemm_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"ck_gemm_softmax_gemm"
,
"gpu::ck_gemm_softmax_gemm"
};
}
ck
::
host
::
device_batched_gemm_softmax_gemm
::
Problem
create_problem
(
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
)
const
{
const
auto
&
a_shape
=
inputs
[
0
];
const
auto
&
b_shape
=
inputs
[
1
];
const
auto
&
b1_shape
=
inputs
[
2
];
const
auto
&
c_shape
=
inputs
.
back
();
// cppcheck-suppress unreadVariable
auto
rank
=
a_shape
.
ndim
();
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
m
=
c_shape
.
lens
()[
rank
-
2
];
m
=
can_fold_batch
(
inputs
)
?
m
*
batch_count
:
m
;
auto
n
=
c_shape
.
lens
().
back
();
auto
k
=
a_shape
.
lens
().
back
();
auto
o
=
c_shape
.
lens
().
back
();
const
bool
trans_a
=
transposed_matrix
(
a_shape
);
const
bool
trans_b
=
transposed_matrix
(
b_shape
);
const
bool
trans_b1
=
transposed_matrix
(
b1_shape
);
const
bool
trans_c
=
transposed_matrix
(
c_shape
);
const
auto
a_type
=
get_type
(
a_shape
);
const
auto
b_type
=
get_type
(
b_shape
);
const
auto
b1_type
=
get_type
(
b1_shape
);
const
auto
c_type
=
get_type
(
c_shape
);
std
::
string
ck_passthrough
=
"ck_passthrough"
;
return
ck
::
host
::
device_batched_gemm_softmax_gemm
::
Problem
{
m
,
n
,
k
,
o
,
trans_a
,
trans_b
,
trans_b1
,
trans_c
,
a_type
,
b_type
,
b1_type
,
c_type
,
ck_passthrough
,
ck_passthrough
,
ck_passthrough
,
ck_passthrough
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
const
auto
&
c_shape
=
inputs
.
back
();
auto
tuning_value
=
v
.
get
(
"tuning_value"
,
5
);
auto
batch_count
=
get_batch_count
(
c_shape
);
auto
problem
=
create_problem
(
inputs
,
v
);
const
auto
include_header
=
problem
.
GetIncludeHeader
();
const
auto
solutions
=
problem
.
GetSolutions
(
ctx
.
get_current_device
().
get_gfx_name
());
const
auto
&
solution
=
solutions
.
at
(
tuning_value
);
const
auto
template_str
=
solution
.
template_str
;
const
auto
blocks_per_batch
=
solution
.
grid_size
;
const
auto
block_size
=
solution
.
block_size
;
hip_compile_options
options
;
options
.
additional_src_files
=
ck_headers
();
auto
grid_size
=
can_fold_batch
(
inputs
)
?
blocks_per_batch
:
batch_count
*
blocks_per_batch
;
options
.
set_launch_params
(
v
,
grid_size
*
block_size
,
block_size
);
options
.
inputs
=
inputs
;
options
.
output
=
c_shape
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"ck_gemm_softmax_gemm_kernel"
);
options
.
virtual_inputs
=
inputs
;
if
(
can_fold_batch
(
inputs
))
{
auto
vinputs
=
inputs
;
fold_batch_dims
(
vinputs
[
0
]);
remove_batch_dims
(
vinputs
[
1
]);
std
::
for_each
(
vinputs
.
begin
()
+
2
,
vinputs
.
end
(),
fold_batch_dims
);
options
.
virtual_inputs
=
vinputs
;
}
if
(
v
.
get
(
"check"
,
false
)
or
enabled
(
MIGRAPHX_CK_DEBUG
{}))
options
.
params
+=
" -DMIGRAPHX_CK_CHECK=1"
;
// scale
assert
(
v
.
contains
(
"scale"
));
auto
scale
=
v
.
at
(
"scale"
).
to
<
float
>
();
options
.
params
+=
" -DSCALE="
+
std
::
to_string
(
scale
);
auto
src
=
interpolate_string
(
ck_gemm_softmax_gemm_kernel
,
{{
"solution"
,
template_str
},
{
"include"
,
include_header
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"blocks_per_batch"
,
to_string
(
blocks_per_batch
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"kernel"
,
options
.
kernel_name
}});
return
compile_hip_code_object
(
src
,
options
);
}
value
create_settings
(
instruction_ref
ins
,
const
operation
&
op
)
const
{
auto
v
=
op
.
to_value
();
v
[
"kernel"
]
=
"ck_gemm_softmax_gemm_kernel"
;
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_ck_gemm_softmax_gemm_function"
)
+
"
\n
MIGRAPHX_LIFT_CLASS(post_ck_gemm_softmax_gemm, "
"post_ck_gemm_softmax_gemm_function);"
;
v
[
"post"
]
=
"ck_function_adaptor<post_ck_gemm_softmax_gemm>"
;
v
[
"kernel"
]
=
"ck_gemm_softmax_gemm_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
v
;
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
const
value
&
solution
)
const
{
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
v
=
create_settings
(
ins
,
op
);
if
(
not
solution
.
is_null
())
v
[
"tuning_value"
]
=
solution
;
return
{
compile_op
(
ctx
,
shapes
,
v
),
[
=
](
module
&
m
,
instruction_ref
ins2
,
const
operation
&
code_object
)
{
if
(
enabled
(
MIGRAPHX_LOG_CK_GEMM
{}))
{
std
::
vector
<
shape
>
gemm_shapes
{
shapes
[
0
],
shapes
[
1
],
shapes
.
back
().
with_type
(
shapes
[
0
].
type
())};
std
::
cout
<<
"gpu::ck_gemm_softmax_gemm: "
<<
to_json_string
(
to_value
(
gemm_shapes
))
<<
std
::
endl
;
}
m
.
replace_instruction
(
ins2
,
code_object
,
ins2
->
inputs
());
}};
}
optional
<
tuning_config
>
get_tuning_config
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
,
bool
exhaustive
)
const
{
if
(
not
exhaustive
and
not
enabled
(
MIGRAPHX_TUNE_CK
{}))
return
nullopt
;
tuning_config
tc
;
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
problem
=
create_problem
(
shapes
,
create_settings
(
ins
,
op
));
auto
solutions
=
problem
.
GetSolutions
(
ctx
.
get_current_device
().
get_gfx_name
());
tc
.
solutions
.
resize
(
solutions
.
size
());
std
::
iota
(
tc
.
solutions
.
begin
(),
tc
.
solutions
.
end
(),
0
);
std
::
vector
<
shape
>
gemm_shapes
{
shapes
[
0
],
shapes
[
1
],
shapes
.
back
()};
tc
.
problem
=
to_value
(
gemm_shapes
);
return
tc
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/jit/mlir.cpp
View file @
13d14c66
...
...
@@ -57,11 +57,9 @@ struct mlir_compiler : compiler<mlir_compiler>
const
operation
&
,
bool
exhaustive
)
const
{
if
(
not
exhaustive
)
return
nullopt
;
auto
shapes
=
to_shapes
(
ins
->
inputs
());
auto
*
smod
=
ins
->
module_inputs
().
front
();
return
get_tuning_config_mlir
(
ctx
,
*
smod
,
shapes
);
return
get_tuning_config_mlir
(
ctx
,
*
smod
,
shapes
,
exhaustive
);
}
};
...
...
src/targets/gpu/jit/roialign.cpp
View file @
13d14c66
...
...
@@ -81,7 +81,7 @@ struct roialign_compiler : compiler<roialign_compiler>
// coord_trans_mode
auto
ctm
=
v
.
at
(
"coordinate_transformation_mode"
).
to
<
std
::
string
>
();
float
rois_offset
=
(
ctm
==
"
output_
half_pixel"
)
?
-
0.5
f
:
0.0
f
;
float
rois_offset
=
(
ctm
==
"half_pixel"
)
?
-
0.5
f
:
0.0
f
;
options
.
params
+=
" -DROIS_OFFSET="
+
std
::
to_string
(
rois_offset
);
// spatial_scale
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ck.hpp
View file @
13d14c66
...
...
@@ -154,6 +154,17 @@ struct ck_add
}
};
// In CK, the B matrix is ordered as N,K instead of K,N
template
<
class
Dims
>
constexpr
auto
ck_transposeb_dims
(
Dims
dims
)
{
return
unpack
(
dims
,
[](
auto
k
,
auto
n
)
{
return
make_const_array
(
n
,
k
);
});
}
template
<
class
Tensor
>
using
ck_transposeb
=
decltype
(
make_shape
(
ck_transposeb_dims
(
get_shape_c
<
Tensor
>
{}.
lens
),
ck_transposeb_dims
(
get_shape_c
<
Tensor
>
{}.
strides
)));
#ifdef MIGRAPHX_CK_CHECK
#define MIGRAPHX_CK_STATIC_ASSERT static_assert
#else
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp
View file @
13d14c66
...
...
@@ -33,17 +33,6 @@
namespace
migraphx
{
// In CK, the B matrix is ordered as N,K instead of K,N
template
<
class
Dims
>
constexpr
auto
ck_transposeb_dims
(
Dims
dims
)
{
return
unpack
(
dims
,
[](
auto
k
,
auto
n
)
{
return
make_const_array
(
n
,
k
);
});
}
template
<
class
Tensor
>
using
ck_transposeb
=
decltype
(
make_shape
(
ck_transposeb_dims
(
get_shape_c
<
Tensor
>
{}.
lens
),
ck_transposeb_dims
(
get_shape_c
<
Tensor
>
{}.
strides
)));
template
<
class
G
,
class
E
,
class
A
,
class
B
,
class
...
Ds
>
__device__
void
ck_gemm_matrix
(
E
e
,
A
a
,
B
b
,
Ds
...
ds
)
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp
0 → 100644
View file @
13d14c66
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_KERNELS_CK_GEMM_SOFTMAX_GEMM_HPP
#define MIGRAPHX_GUARD_KERNELS_CK_GEMM_SOFTMAX_GEMM_HPP
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/tensor_view.hpp>
#include <migraphx/kernels/ck.hpp>
#include <migraphx/kernels/gemm_batcher.hpp>
namespace
migraphx
{
template
<
class
T
>
struct
ck_gemm_softmax_gemm_settings
{
T
scale
{};
};
template
<
class
...
Ts
>
constexpr
ck_gemm_softmax_gemm_settings
<
Ts
...
>
make_ck_gemm_softmax_gemm_settings
(
Ts
...
xs
)
{
return
{
xs
...};
}
template
<
class
G
,
class
C
,
class
A
,
class
B
,
class
B1
,
class
Settings
>
__device__
void
ck_gemm_softmax_gemm_matrix
(
C
c
,
A
a
,
B
b
,
B1
b1
,
Settings
s
)
{
constexpr
auto
desc
=
G
::
make_descriptor
(
to_ck_tensor
<
A
>
(),
to_ck_tensor
<
ck_transposeb
<
B
>>
(),
to_ck_tensor
<
ck_transposeb
<
B1
>>
(),
to_ck_tensor
<
C
>
());
static_assert
(
desc
.
IsValid
(),
"Invalid ck gemm."
);
G
::
Run
(
desc
,
s
.
scale
,
to_ck_const_pointer
(
a
.
data
()),
to_ck_const_pointer
(
b
.
data
()),
to_ck_const_pointer
(
b1
.
data
()),
to_ck_pointer
(
c
.
data
()));
}
template
<
class
G
,
index_int
BlocksPerBatch
,
class
...
Ts
,
class
Settings
>
__device__
void
ck_gemm_softmax_gemm
(
Settings
s
,
Ts
...
xs
)
{
gemm_batch_args
(
make_index
(),
_c
<
BlocksPerBatch
>
,
xs
...)(
[
&
](
auto
...
ys
)
{
ck_gemm_softmax_gemm_matrix
<
G
>
(
ys
...,
s
);
});
}
}
// namespace migraphx
#endif
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
13d14c66
...
...
@@ -31,6 +31,14 @@
#include <migraphx/kernels/debug.hpp>
#include <migraphx/kernels/functional.hpp>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-identifier"
extern
"C"
__device__
size_t
__ockl_get_enqueued_local_size
(
uint
);
// NOLINT
extern
"C"
__device__
size_t
__ockl_get_local_size
(
uint
);
// NOLINT
#pragma clang diagnostic pop
#endif
namespace
migraphx
{
#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL)
...
...
@@ -45,43 +53,37 @@ inline __device__ __attribute__((const)) index_int compute_global_size()
// This actualy works even when global is not divisible by local size.
// This doesnt actually do a multiplicatiosn. Instead it calls a device
// function to get the global size, which is why it works.
return
blockDim
.
x
*
gridDim
.
x
;
// NOLINT
return
blockDim
.
x
*
gridDim
.
x
;
// NOLINT
#endif
}
// We cant just use blockDim.x to get the local size since its broken on hip
// when global is not divisible by local size. In this case, we calulate the
// size for the last group.
#ifdef MIGRAPHX_NGROUP
// If global is divisible by local then local can be a const
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#define MIGRAPHX_HAS_CONST_LOCAL 1
#endif
#endif
inline
__device__
__attribute__
((
const
))
index_int
compute_local_size
()
{
#ifdef MIGRAPHX_NLOCAL
const
auto
nlocal
=
MIGRAPHX_NLOCAL
;
#else
const
auto
nlocal
=
blockDim
.
x
;
// NOLINT
#endif
#ifdef MIGRAPHX_NGROUP
const
auto
ngroup
=
MIGRAPHX_NGROUP
;
#ifdef MIGRAPHX_HAS_CONST_LOCAL
return
MIGRAPHX_NLOCAL
;
#else
const
auto
ngroup
=
gridDim
.
x
;
// NOLINT
// Returns block size. For the non-uniform block it returns the size of the non-uniform block.
return
__ockl_get_local_size
(
0
);
// NOLINT
#endif
const
auto
group_id
=
blockIdx
.
x
;
// NOLINT
const
auto
nglobal
=
compute_global_size
();
if
(
group_id
==
ngroup
-
1
)
{
return
1
+
(
nglobal
-
1
)
%
nlocal
;
}
else
{
return
nlocal
;
// NOLINT
}
}
#ifdef MIGRAPHX_NGROUP
// If global is divisible by local then local can be a const
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#define MIGRAPHX_HAS_CONST_LOCAL 1
#endif
inline
__device__
__attribute__
((
const
))
index_int
compute_max_local_size
()
{
#ifdef MIGRAPHX_LOCAL
return
MIGRAPHX_NLOCAL
;
#else
// Returns the block size. When workgrop has non-uniform block, this returns size of the uniform
// block.
return
__ockl_get_enqueued_local_size
(
0
);
// NOLINT
#endif
}
struct
index
{
...
...
@@ -126,8 +128,8 @@ struct index
#else
__device__
index_int
max_nlocal
()
const
{
MIGRAPHX_ASSERT
(
blockDim
.
x
>
0
);
return
blockDim
.
x
;
MIGRAPHX_ASSERT
(
compute_max_local_size
()
>
0
);
return
compute_max_local_size
()
;
}
#endif
...
...
@@ -249,7 +251,8 @@ struct index
#endif
inline
__device__
__attribute__
((
const
))
index
make_index
()
{
return
index
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
return
index
{
blockIdx
.
x
*
compute_max_local_size
()
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
}
}
// namespace migraphx
...
...
src/targets/gpu/lowering.cpp
View file @
13d14c66
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
...
...
@@ -40,6 +40,7 @@
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/reshape_lazy.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
...
...
@@ -89,7 +90,6 @@ struct miopen_apply
offload_copy
=
(
mod
==
mpm
->
get_root_module
())
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"contiguous"
);
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"logsoftmax"
);
...
...
@@ -115,6 +115,7 @@ struct miopen_apply
add_neg_op
();
add_nms_op
();
add_select_module_op
();
add_reshape_lazy_op
();
}
void
copy_params
()
const
...
...
@@ -376,6 +377,32 @@ struct miopen_apply
return
mod
->
replace_instruction
(
ins
,
ins
->
get_operator
(),
inputs
,
ins
->
module_inputs
());
});
}
/**
* Adds reshape lazy to reshape ops that can be aliased instead of copied.
* `gpu::contiguous` are added before and after the reshape; these contiguous
* instructions can be removed by the eliminate_contiguous pass.
*/
void
add_reshape_lazy_op
()
{
apply_map
.
emplace
(
"reshape"
,
[
=
](
instruction_ref
ins
)
{
std
::
vector
<
instruction_ref
>
before_contiguous_args
=
ins
->
inputs
();
auto
before_alloc
=
insert_allocation
(
ins
,
std
::
prev
(
ins
)
->
get_shape
());
before_contiguous_args
.
push_back
(
before_alloc
);
auto
before_contig
=
mod
->
insert_instruction
(
ins
,
make_op
(
"gpu::contiguous"
),
{
before_contiguous_args
});
auto
new_lazy_reshape
=
mod
->
insert_instruction
(
ins
,
make_op
(
"reshape_lazy"
,
{{
"dims"
,
{
ins
->
get_operator
().
to_value
().
at
(
"dims"
)}}}),
before_contig
);
std
::
vector
<
instruction_ref
>
after_contiguous_args
=
{
new_lazy_reshape
};
auto
after_alloc
=
insert_allocation
(
new_lazy_reshape
,
new_lazy_reshape
->
get_shape
());
after_contiguous_args
.
push_back
(
after_alloc
);
return
mod
->
replace_instruction
(
ins
,
make_op
(
"gpu::contiguous"
),
after_contiguous_args
);
});
}
};
void
lowering
::
apply
(
module_pass_manager
&
mpm
)
const
...
...
src/targets/gpu/mlir.cpp
View file @
13d14c66
...
...
@@ -24,6 +24,7 @@
#include "migraphx/make_op.hpp"
#include <migraphx/stringutils.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <ostream>
#ifdef MIGRAPHX_MLIR
#include <mlir-c/IR.h>
...
...
@@ -34,6 +35,7 @@
#include <mlir-c/Dialect/Rock.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Support.h>
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
...
...
@@ -180,13 +182,85 @@ std::string mlir_print(F f, T x)
return
ss
.
str
();
}
struct
mlir_logger
{
std
::
stringstream
ss
;
mlir_context
*
ctx
;
std
::
optional
<
MlirDiagnosticHandlerID
>
id
;
mlir_logger
()
:
ctx
(
nullptr
),
id
(
std
::
nullopt
)
{}
mlir_logger
(
mlir_context
*
context
)
:
ctx
(
context
)
{
id
=
mlirContextAttachDiagnosticHandler
(
ctx
->
get
(),
mlir_diagnostic_print_cb
,
this
,
nullptr
);
}
~
mlir_logger
()
{
if
(
id
.
has_value
())
mlirContextDetachDiagnosticHandler
(
ctx
->
get
(),
*
id
);
}
mlir_logger
(
const
mlir_logger
&
other
)
=
delete
;
mlir_logger
&
operator
=
(
const
mlir_logger
&
other
)
=
delete
;
mlir_logger
(
mlir_logger
&&
other
)
noexcept
:
ss
(
std
::
move
(
other
.
ss
)),
ctx
(
other
.
ctx
),
id
(
other
.
id
)
{
other
.
ctx
=
nullptr
;
other
.
id
=
std
::
nullopt
;
}
mlir_logger
&
operator
=
(
mlir_logger
other
)
noexcept
{
std
::
swap
(
ss
,
other
.
ss
);
std
::
swap
(
ctx
,
other
.
ctx
);
std
::
swap
(
id
,
other
.
id
);
return
*
this
;
}
std
::
string
str
()
const
{
return
ss
.
str
();
}
void
clear
()
{
ss
=
std
::
stringstream
{};
}
static
MlirLogicalResult
mlir_diagnostic_print_cb
(
MlirDiagnostic
diag
,
void
*
logger
);
MlirLogicalResult
handle
(
MlirDiagnostic
diag
);
};
MlirLogicalResult
mlir_logger
::
mlir_diagnostic_print_cb
(
MlirDiagnostic
diag
,
void
*
logger
)
{
return
reinterpret_cast
<
mlir_logger
*>
(
logger
)
->
handle
(
diag
);
}
MlirLogicalResult
mlir_logger
::
handle
(
MlirDiagnostic
diag
)
{
MlirDiagnosticSeverity
sev
=
mlirDiagnosticGetSeverity
(
diag
);
switch
(
sev
)
{
case
MlirDiagnosticSeverity
::
MlirDiagnosticError
:
ss
<<
"Error: "
;
break
;
case
MlirDiagnosticSeverity
::
MlirDiagnosticWarning
:
ss
<<
"Warning: "
;
break
;
case
MlirDiagnosticSeverity
::
MlirDiagnosticNote
:
ss
<<
"Note: "
;
break
;
case
MlirDiagnosticSeverity
::
MlirDiagnosticRemark
:
ss
<<
"Remark: "
;
break
;
}
mlir_print
(
mlirDiagnosticPrint
,
diag
,
[
&
](
auto
s
)
{
ss
<<
s
;
});
ss
<<
std
::
endl
;
for
(
intptr_t
i
=
0
,
e
=
mlirDiagnosticGetNumNotes
(
diag
);
i
<
e
;
++
i
)
{
(
void
)
handle
(
mlirDiagnosticGetNote
(
diag
,
i
));
}
return
mlirLogicalResultSuccess
();
}
struct
mlir_program
{
mlir_program
()
:
ctx
(
mlirContextCreateWithRegistry
(
get_dialect_registry
().
get
(),
/*threadingEnable=*/
false
)),
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
mmodule
(
mlirModuleCreateEmpty
(
location
))
mmodule
(
mlirModuleCreateEmpty
(
location
)),
logger
(
&
ctx
)
{
mlirContextSetThreadPool
(
ctx
.
get
(),
get_thread_pool
().
get
());
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
...
...
@@ -246,7 +320,10 @@ struct mlir_program
MlirType
make_tensor
(
const
shape
&
s
)
const
{
assert
(
s
.
standard
());
if
(
not
s
.
standard
())
MIGRAPHX_THROW
(
"MLIR expects all tensors to be in standard shape"
);
if
(
s
.
dynamic
())
MIGRAPHX_THROW
(
"MLIR does not support dynamic shapes"
);
std
::
vector
<
int64_t
>
lens
(
s
.
lens
().
begin
(),
s
.
lens
().
end
());
return
mlirRankedTensorTypeGet
(
lens
.
size
(),
lens
.
data
(),
make_type
(
s
.
type
()),
mlirAttributeGetNull
());
...
...
@@ -614,21 +691,49 @@ struct mlir_program
}
}
void
run_high_level_pipeline
()
MIGRAPHX_TIDY_CONST
void
run_high_level_pipeline
()
{
mlir_pass_manager
pm_front
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirPassManagerRunOnOp
(
pm_front
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
()));
logger
.
clear
();
if
(
mlirLogicalResultIsFailure
(
mlirPassManagerRunOnOp
(
pm_front
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
()))))
{
std
::
string
error
=
"Invalid MLIR created: "
+
logger
.
str
();
if
(
enabled
(
MIGRAPHX_TRACE_MLIR
{}))
{
std
::
cout
<<
error
<<
std
::
endl
;
}
MIGRAPHX_THROW
(
error
);
}
}
void
run_backend_pipeline
()
MIGRAPHX_TIDY_CONST
void
run_backend_pipeline
()
{
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirPassManagerRunOnOp
(
pm_back
.
get
(),
mlirModuleGetOperation
(
mmodule
.
get
()));
logger
.
clear
();
const
size_t
trace
=
value_of
(
MIGRAPHX_TRACE_MLIR
{});
static
std
::
mutex
mutex
;
auto
mod_op
=
mlirModuleGetOperation
(
mmodule
.
get
());
if
(
trace
>=
2
)
{
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex
);
std
::
cout
<<
mlir_print
(
&
mlirOperationPrint
,
mod_op
)
<<
std
::
endl
;
}
if
(
mlirLogicalResultIsFailure
(
mlirPassManagerRunOnOp
(
pm_back
.
get
(),
mod_op
)))
{
std
::
string
error
=
"MLIR backend compilation failed: "
+
logger
.
str
();
if
(
enabled
(
MIGRAPHX_TRACE_MLIR
{}))
{
std
::
cout
<<
error
<<
std
::
endl
;
}
MIGRAPHX_THROW
(
error
);
}
}
code_object_op
compile
(
const
value
&
solution
)
MIGRAPHX_TIDY_CONST
code_object_op
compile
(
const
value
&
solution
)
{
// 1st pipeline to call
run_high_level_pipeline
();
...
...
@@ -682,11 +787,12 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed setting tuning key: "
+
*
str
);
}
tuning_config
get_tuning_config
(
)
MIGRAPHX_TIDY_CONST
tuning_config
get_tuning_config
(
bool
exhaustive
)
{
tuning_config
tc
;
run_high_level_pipeline
();
auto
tuning_mode
=
RocmlirTuningParamSetKindFull
;
auto
tuning_mode
=
exhaustive
?
RocmlirTuningParamSetKindFull
:
RocmlirTuningParamSetKindQuick
;
if
(
enabled
(
MIGRAPHX_MLIR_TUNE_EXHAUSTIVE
{}))
tuning_mode
=
RocmlirTuningParamSetKindExhaustive
;
mlir_tuning_space
params
{
mlirRockTuningSpaceCreate
(
mmodule
.
get
(),
tuning_mode
)};
...
...
@@ -701,7 +807,8 @@ struct mlir_program
if
(
perf_key_bytes
>
perf_key
.
size
())
MIGRAPHX_THROW
(
"Tuning perf key was "
+
std
::
to_string
(
perf_key_bytes
)
+
" bytes and thus too long"
);
tc
.
solutions
.
emplace_back
(
perf_key
.
begin
(),
perf_key
.
begin
()
+
perf_key_bytes
);
tc
.
solutions
.
emplace_back
(
std
::
string
(
perf_key
.
begin
(),
perf_key
.
begin
()
+
perf_key_bytes
));
}
std
::
array
<
char
,
ROCMLIR_TUNING_KEY_BUFSZ
>
tuning_key
;
size_t
tuning_key_bytes
=
...
...
@@ -808,6 +915,7 @@ struct mlir_program
mlir_context
ctx
;
MlirLocation
location
;
mlir_module
mmodule
;
mlir_logger
logger
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_arch
=
""
;
...
...
@@ -914,15 +1022,17 @@ instruction_ref insert_mlir(module& m,
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
}
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
)
tuning_config
get_tuning_config_mlir
(
const
context
&
migraphx_ctx
,
module
m
,
const
std
::
vector
<
shape
>&
inputs
,
bool
exhaustive
)
{
adjust_param_shapes
(
m
,
inputs
);
mlir_program
mp
;
mp
.
set_gpu_properties
(
migraphx_ctx
);
mp
.
parse
(
m
);
return
mp
.
get_tuning_config
();
return
mp
.
get_tuning_config
(
exhaustive
);
}
#else
...
...
@@ -951,7 +1061,7 @@ insert_mlir(module& m, instruction_ref, code_object_op co, const std::vector<ins
return
m
.
end
();
}
tuning_config
get_tuning_config_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
shape
>&
)
tuning_config
get_tuning_config_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
shape
>&
,
bool
)
{
return
{};
}
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
13d14c66
...
...
@@ -21,17 +21,19 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/permutation.hpp>
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/gpu/gemm_softmax_gemm.hpp>
#include <migraphx/match/layernorm.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/gpu/ck.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
{
template
<
class
Derived
,
std
::
size_t
N
>
...
...
@@ -45,40 +47,42 @@ struct layernorm_base
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
std
::
vector
<
module_ref
>
mods
)
const
{
std
::
size_t
nargs
=
1
;
std
::
size_t
nargs
=
N
;
if
(
not
mods
.
empty
())
{
auto
*
pm
=
mods
.
front
();
nargs
=
pm
->
get_parameter_names
().
size
();
nargs
+
=
pm
->
get_parameter_names
().
size
()
-
1
;
}
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
auto
s
=
inputs
.
a
t
(
0
);
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
);
auto
s
=
inputs
.
fron
t
();
auto
t
=
s
.
type
();
if
(
not
mods
.
empty
())
t
=
mods
.
front
()
->
get_output_shapes
().
front
().
type
();
if
(
s
.
scalar
())
{
return
s
;
}
else
if
(
s
.
broadcasted
())
{
return
{
t
,
s
.
lens
()};
}
else
{
return
s
.
with_lens
(
t
,
s
.
lens
());
}
// Scalar output if all inputs are scalar
if
(
inputs
.
front
().
elements
()
==
1
and
all_of
(
inputs
,
[](
const
auto
&
ss
)
{
return
ss
.
scalar
();
}))
return
inputs
.
front
();
auto
l_s
=
shape
::
from_permutation
(
t
,
s
.
lens
(),
find_permutation
(
std
::
vector
<
shape
>
(
inputs
.
begin
(),
inputs
.
begin
()
+
N
)));
// just prelayernorm or preadd_layernorm
if
(
nargs
<=
N
)
return
l_s
;
// else, layernorm + pointwise fusion, preserve layout of fused op
std
::
vector
<
shape
>
lp_s
(
inputs
.
begin
()
+
N
,
inputs
.
end
());
lp_s
.
insert
(
lp_s
.
begin
(),
l_s
);
return
shape
::
from_permutation
(
t
,
s
.
lens
(),
find_permutation
(
lp_s
));
}
};
struct
layernorm
:
layernorm_base
<
layernorm
,
0
>
struct
layernorm
:
layernorm_base
<
layernorm
,
1
>
{
std
::
string
name
()
const
{
return
"gpu::prelayernorm"
;
}
};
MIGRAPHX_REGISTER_OP
(
layernorm
);
struct
add_layernorm
:
layernorm_base
<
add_layernorm
,
1
>
struct
add_layernorm
:
layernorm_base
<
add_layernorm
,
2
>
{
std
::
string
name
()
const
{
return
"gpu::preadd_layernorm"
;
}
};
...
...
@@ -117,6 +121,60 @@ struct find_add_layernorm
m
.
replace_instruction
(
ins
,
add_layernorm
{
op
.
epsilon
},
add_ins
->
inputs
());
}
};
struct
pre_gemm_softmax_gemm
:
gemm_softmax_gemm
{
std
::
string
name
()
const
{
return
"gpu::pre_gemm_softmax_gemm"
;
}
};
MIGRAPHX_REGISTER_OP
(
pre_gemm_softmax_gemm
);
MIGRAPHX_PRED_MATCHER
(
is_ck_gemm
,
instruction_ref
ins
)
{
if
(
ins
->
name
()
!=
"dot"
)
return
false
;
if
(
not
pre_gemm_softmax_gemm
::
is_ck_supported_type
(
ins
->
get_shape
().
type
()))
return
false
;
return
true
;
}
struct
find_gemm_softmax_gemm
{
auto
matcher
()
const
{
auto
gemm1
=
match
::
skip
(
match
::
name
(
"contiguous"
))(
match
::
name
(
"dot"
)(
is_ck_gemm
().
bind
(
"gemm1"
)));
auto
mul
=
match
::
name
(
"mul"
)(
match
::
nargs
(
2
),
match
::
either_arg
(
0
,
1
)(
match
::
is_constant
().
bind
(
"scale"
),
gemm1
));
auto
softmax
=
match
::
name
(
"softmax"
)(
match
::
arg
(
0
)(
mul
)).
bind
(
"softmax"
);
return
match
::
name
(
"dot"
)(
is_ck_gemm
().
bind
(
"gemm2"
))(
match
::
arg
(
0
)(
softmax
));
}
void
apply
(
module_pass_manager
&
mpm
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
gemm2_ins
=
r
.
instructions
[
"gemm2"
];
auto
gemm1_ins
=
r
.
instructions
[
"gemm1"
];
auto
scale_lit
=
r
.
instructions
[
"scale"
];
float
scale
=
1.0
;
scale_lit
->
eval
().
visit
([
&
](
const
auto
s
)
{
// CK only supports single-valued scale
if
(
std
::
all_of
(
s
.
begin
()
+
1
,
s
.
end
(),
[
&
](
auto
v
)
{
return
float_equal
(
v
,
s
.
front
());
}))
scale
=
s
.
front
();
else
return
;
});
auto
inputs
=
gemm1_ins
->
inputs
();
// A, B
inputs
.
push_back
(
gemm2_ins
->
inputs
().
back
());
// B1
mpm
.
get_module
().
replace_instruction
(
ins
,
pre_gemm_softmax_gemm
{
gemm2_ins
->
get_operator
(),
scale
},
inputs
);
}
};
}
// namespace
void
prefuse_ops
::
apply
(
module_pass_manager
&
mpm
)
const
...
...
@@ -124,6 +182,8 @@ void prefuse_ops::apply(module_pass_manager& mpm) const
match
::
find_matches
(
mpm
.
get_module
(),
find_layernorm
{});
mpm
.
run_pass
(
dead_code_elimination
{});
match
::
find_matches
(
mpm
.
get_module
(),
find_add_layernorm
{});
if
(
enabled
(
MIGRAPHX_ENABLE_CK
{}))
match
::
find_matches
(
mpm
,
find_gemm_softmax_gemm
{});
}
}
// namespace gpu
...
...
src/targets/gpu/target.cpp
View file @
13d14c66
...
...
@@ -48,6 +48,7 @@
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/split_single_dyn_dim.hpp>
...
...
@@ -109,6 +110,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
{
split_single_dyn_dim
{},
dead_code_elimination
{},
simplify_dyn_ops
{},
dead_code_elimination
{},
normalize_ops
{},
dead_code_elimination
{},
simplify_qdq
{},
...
...
src/targets/gpu/time_op.cpp
View file @
13d14c66
...
...
@@ -41,8 +41,7 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
}
using
milliseconds
=
std
::
chrono
::
duration
<
double
,
std
::
milli
>
;
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
)
double
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
)
{
// TODO: Use std::ref
...
...
@@ -51,21 +50,19 @@ time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n)
auto
output
=
op
.
compute_shape
(
inputs
);
op
.
finalize
(
ctx
,
output
,
inputs
);
auto
args
=
generate_arguments
(
inputs
);
auto
run
=
[
&
]
{
op
.
compute
(
ctx
,
output
,
args
);
ctx
.
finish
();
};
gctx
.
enable_perf_measurement
();
auto
start
=
context
::
create_event_for_timing
();
auto
stop
=
context
::
create_event_for_timing
();
auto
run
=
[
&
]
{
op
.
compute
(
ctx
,
output
,
args
);
};
run
();
double
host_time
=
0.0
;
double
device_time
=
0.0
;
gctx
.
get_stream
().
record
(
start
.
get
());
for
(
auto
i
:
range
(
n
))
{
(
void
)
i
;
host_time
+=
time
<
milliseconds
>
(
run
);
device_time
+=
gctx
.
get_elapsed_ms
();
run
();
}
return
std
::
make_pair
(
host_time
/
n
,
device_time
/
n
);
gctx
.
get_stream
().
record
(
stop
.
get
());
gctx
.
finish
();
return
context
::
get_elapsed_ms
(
start
.
get
(),
stop
.
get
())
/
n
;
}
}
// namespace gpu
...
...
src/tf/parse_reshape.cpp
View file @
13d14c66
...
...
@@ -45,8 +45,7 @@ struct parse_reshape : op_parser<parse_reshape>
auto
s
=
args
[
1
]
->
eval
();
std
::
vector
<
int64_t
>
dims
;
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
dims
));
});
return
info
.
add_instruction
(
make_op
(
"reshape"
,
{{
"dims"
,
dims
}}),
info
.
make_contiguous
(
args
[
0
]));
return
info
.
add_instruction
(
make_op
(
"reshape"
,
{{
"dims"
,
dims
}}),
args
[
0
]);
}
};
...
...
src/verify_args.cpp
View file @
13d14c66
...
...
@@ -28,19 +28,20 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
bool
verify_args
(
const
std
::
string
&
name
,
const
argument
&
ref_arg
,
const
argument
&
target_arg
,
double
tolerance
)
const
verify
::
expected
<
argument
>&
ref_arg
,
verify
::
tolerance
tols
)
{
bool
passed
=
true
;
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
double
error
;
passed
=
verify
::
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
visit_all
(
ref_arg
.
data
(),
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
double
rms_error
;
passed
=
verify
::
verify_range_with_tolerance
(
target
,
verify
::
expected
{
ref
},
tols
,
&
rms_error
);
if
(
not
passed
)
{
// TODO: Check for nans
std
::
cout
<<
"FAILED: "
<<
name
<<
std
::
endl
;
std
::
cout
<<
"
e
rror: "
<<
error
<<
std
::
endl
;
std
::
cout
<<
"
RMS E
rror: "
<<
rms_
error
<<
std
::
endl
;
if
(
ref
.
size
()
<
32
)
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
if
(
target
.
size
()
<
32
)
...
...
@@ -78,16 +79,6 @@ bool verify_args(const std::string& name,
if
(
verify
::
range_zero
(
target
))
std
::
cout
<<
"Target data is all zeros"
<<
std
::
endl
;
// auto mxdiff = max_diff(ref, target);
// std::cout << "Max diff: " << mxdiff << std::endl;
// auto idx = mismatch_idx(ref, target, float_equal);
// if(idx < verify::range_distance(ref))
// {
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// << std::endl;
// }
auto
ref_nan_idx
=
find_idx
(
ref
,
verify
::
not_finite
);
if
(
ref_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in ref at "
<<
ref_nan_idx
<<
": "
...
...
@@ -97,11 +88,22 @@ bool verify_args(const std::string& name,
if
(
target_nan_idx
>=
0
)
std
::
cout
<<
"Non finite number found in target at "
<<
target_nan_idx
<<
": "
<<
target
[
target_nan_idx
]
<<
std
::
endl
;
//
std::cout << std::endl;
std
::
cout
<<
"MIGraphX verification passed successfully."
<<
std
::
endl
;
}
});
return
passed
;
}
bool
verify_args_with_tolerance
(
const
std
::
string
&
name
,
const
argument
&
target_arg
,
const
verify
::
expected
<
argument
>&
ref_arg
,
std
::
size_t
tolerance
)
{
double
rms_tol
=
0.001
;
target_arg
.
visit
([
&
](
auto
ta
)
{
rms_tol
=
verify
::
get_rms_tol
(
ta
,
tolerance
);
});
verify
::
tolerance
tols
{
rms_tol
};
return
verify_args
(
name
,
target_arg
,
ref_arg
,
tols
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
test/CMakeLists.txt
View file @
13d14c66
...
...
@@ -25,92 +25,19 @@
cmake_policy
(
SET CMP0057 NEW
)
find_package
(
Threads REQUIRED
)
include
(
ProcessorCount
)
ProcessorCount
(
N
)
set
(
CTEST_PARALLEL_LEVEL
${
N
}
CACHE STRING
"CTest parallel level"
)
add_custom_target
(
check COMMAND
${
CMAKE_CTEST_COMMAND
}
--output-on-failure -j
${
CTEST_PARALLEL_LEVEL
}
-C
${
CMAKE_CFG_INTDIR
}
--timeout 5000
)
add_custom_target
(
tests
)
find_program
(
MIGRAPHX_GDB gdb
)
if
(
MIGRAPHX_GDB
)
set
(
MIGRAPHX_TEST_GDB On CACHE BOOL
""
)
else
()
set
(
MIGRAPHX_TEST_GDB Off CACHE BOOL
""
)
endif
()
set
(
SKIP_TESTS
)
function
(
add_test_command NAME EXE
)
if
(
NAME IN_LIST SKIP_TESTS
)
add_test
(
NAME
${
NAME
}
COMMAND echo skipped
)
set_tests_properties
(
${
NAME
}
PROPERTIES DISABLED On
)
elseif
(
WIN32
)
set
(
WINPATH
)
foreach
(
PATH
${
CMAKE_FIND_ROOT_PATH
}
)
list
(
APPEND WINPATH
${
PATH
}
/bin
)
endforeach
()
file
(
GENERATE OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/test_
${
NAME
}
.cmd"
CONTENT
"set PATH=
${
WINPATH
}
;%PATH%
%1
${
ARGN
}
"
)
add_test
(
NAME
${
NAME
}
COMMAND
${
WINE_CMD
}
cmd /c
"
${
CMAKE_CURRENT_BINARY_DIR
}
/test_
${
NAME
}
.cmd"
$<TARGET_FILE:
${
EXE
}
>
)
else
()
if
(
MIGRAPHX_TEST_GDB
)
# add_test(NAME ${NAME} COMMAND ${MIGRAPHX_GDB}
# --batch
# --return-child-result
# -ex "set disable-randomization off"
# -ex run
# -ex backtrace
# --args $<TARGET_FILE:${EXE}> ${ARGN})
set
(
TEST_DIR
${
CMAKE_CURRENT_BINARY_DIR
}
/gdb/test_
${
NAME
}
)
file
(
MAKE_DIRECTORY
${
TEST_DIR
}
)
if
(
NOT EXISTS
${
TEST_DIR
}
)
message
(
FATAL_ERROR
"Failed to create test directory:
${
TEST_DIR
}
"
)
endif
()
file
(
GENERATE OUTPUT
"
${
TEST_DIR
}
/run.cmake"
CONTENT
"
# Remove previous core dump
file(REMOVE
${
TEST_DIR
}
/core)
execute_process(COMMAND $<TARGET_FILE:
${
EXE
}
>
${
ARGN
}
WORKING_DIRECTORY
${
TEST_DIR
}
RESULT_VARIABLE RESULT)
if(NOT RESULT EQUAL 0)
# TODO: check for core files based on pid when setting /proc/sys/kernel/core_uses_pid
if(EXISTS
${
TEST_DIR
}
/core)
set(
\$
ENV{UBSAN_OPTIONS} print_stacktrace=1)
set(
\$
ENV{ASAN_OPTIONS} print_stacktrace=1)
execute_process(COMMAND
${
MIGRAPHX_GDB
}
$<TARGET_FILE:
${
EXE
}
>
${
TEST_DIR
}
/core -batch -ex bt)
endif()
message(FATAL_ERROR
\"
Test failed
\"
)
endif()
"
)
add_test
(
NAME
${
NAME
}
COMMAND
${
CMAKE_COMMAND
}
-P
"
${
TEST_DIR
}
/run.cmake"
)
else
()
add_test
(
NAME
${
NAME
}
COMMAND
${
EXE
}
${
ARGN
}
)
endif
()
endif
()
rocm_test_link_libraries
(
Threads::Threads migraphx migraphx_ref migraphx_onnx migraphx_tf
)
rocm_test_include_directories
(
include
)
set_tests_properties
(
${
NAME
}
PROPERTIES FAIL_REGULAR_EXPRESSION
"FAILED"
)
endfunction
()
function
(
add_test_executable TEST_NAME
)
add_executable
(
${
TEST_NAME
}
EXCLUDE_FROM_ALL
${
ARGN
}
)
set
(
TEST_COMMAND
${
TEST_NAME
}
)
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
Threads::Threads migraphx migraphx_onnx migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
endfunction
(
add_test_executable
)
set
(
MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS Off CACHE BOOL
""
)
if
(
MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS
)
add_compile_definitions
(
MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS
)
endif
()
file
(
GLOB TESTS CONFIGURE_DEPENDS *.cpp
)
foreach
(
TEST
${
TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
add_test_executable
(
test_
${
BASE_NAME
}
${
TEST
}
)
rocm_
add_test_executable
(
test_
${
BASE_NAME
}
${
TEST
}
)
rocm_clang_tidy_check
(
test_
${
BASE_NAME
}
)
endforeach
()
...
...
@@ -120,7 +47,7 @@ if(MIGRAPHX_ENABLE_GPU)
foreach
(
TEST
${
GPU_TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
add_test_executable
(
test_gpu_
${
BASE_NAME
}
${
TEST
}
)
rocm_
add_test_executable
(
test_gpu_
${
BASE_NAME
}
${
TEST
}
)
rocm_clang_tidy_check
(
test_gpu_
${
BASE_NAME
}
)
set_tests_properties
(
test_gpu_
${
BASE_NAME
}
PROPERTIES
COST 10
...
...
@@ -139,7 +66,7 @@ if(MIGRAPHX_ENABLE_FPGA)
foreach
(
TEST
${
FPGA_TESTS
}
)
get_filename_component
(
BASE_NAME
${
TEST
}
NAME_WE
)
add_test_executable
(
test_fpga_
${
BASE_NAME
}
${
TEST
}
)
rocm_
add_test_executable
(
test_fpga_
${
BASE_NAME
}
${
TEST
}
)
rocm_clang_tidy_check
(
test_fpga_
${
BASE_NAME
}
)
set_tests_properties
(
test_fpga_
${
BASE_NAME
}
PROPERTIES
COST 10
...
...
@@ -161,19 +88,17 @@ foreach(ONNX_TEST ${ONNX_TESTS})
target_link_libraries
(
${
TEST_NAME
}
migraphx_onnx migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
> WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
rocm_mark_as_test
(
${
TEST_NAME
}
)
endforeach
()
# tf test
set
(
TEST_TF_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
/tf
)
add_executable
(
test_tf tf/tf_test.cpp
)
rocm_mark_as_test
(
test_tf
)
rocm_clang_tidy_check
(
test_tf
)
target_link_libraries
(
test_tf migraphx_tf
)
target_include_directories
(
test_tf PUBLIC include
)
add_test
(
NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY
${
TEST_TF_DIR
}
)
add_dependencies
(
tests test_tf
)
add_dependencies
(
check test_tf
)
add_subdirectory
(
api
)
add_subdirectory
(
verify
)
...
...
@@ -196,8 +121,7 @@ if(MIGRAPHX_ENABLE_GPU AND MIGRAPHX_ENABLE_CPU AND MIGRAPHX_ENABLE_FPGA)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_onnx migraphx_tf migraphx_all_targets
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
> WORKING_DIRECTORY
${
TEST_MULTI_TARGET_DIR
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
rocm_mark_as_test
(
${
TEST_NAME
}
)
endforeach
()
endif
()
...
...
@@ -214,7 +138,7 @@ int main() {}\n"
#endif
\n
"
)
add_test_executable
(
${
NAME
}
rocm_
add_test_executable
(
${
NAME
}
${
CMAKE_CURRENT_BINARY_DIR
}
/header-main-include-
${
NAME
}
.cpp
${
CMAKE_CURRENT_BINARY_DIR
}
/header-static-include-
${
NAME
}
.cpp
)
...
...
@@ -236,13 +160,13 @@ test_headers(migraphx ${CMAKE_SOURCE_DIR}/src/include/migraphx/*.hpp)
test_headers
(
migraphx/ref
${
CMAKE_SOURCE_DIR
}
/src/targets/ref/include/migraphx/ref/*.hpp
)
if
(
MIGRAPHX_ENABLE_GPU
)
test_headers
(
migraphx/gpu
${
CMAKE_SOURCE_DIR
}
/src/targets/gpu/include/migraphx/gpu/*.hpp
)
test_headers
(
migraphx/gpu
HEADERS
${
CMAKE_SOURCE_DIR
}
/src/targets/gpu/include/migraphx/gpu/*.hpp
DEPENDS migraphx_gpu
)
endif
()
if
(
MIGRAPHX_ENABLE_CPU
)
test_headers
(
migraphx/cpu
${
CMAKE_SOURCE_DIR
}
/src/targets/cpu/include/migraphx/cpu/*.hpp
)
test_headers
(
migraphx/cpu
HEADERS
${
CMAKE_SOURCE_DIR
}
/src/targets/cpu/include/migraphx/cpu/*.hpp
migraphx_cpu
)
endif
()
if
(
MIGRAPHX_ENABLE_FPGA
)
test_headers
(
migraphx/fpga
${
CMAKE_SOURCE_DIR
}
/src/targets/fpga/include/migraphx/fpga/*.hpp
)
test_headers
(
migraphx/fpga
HEADERS
${
CMAKE_SOURCE_DIR
}
/src/targets/fpga/include/migraphx/fpga/*.hpp
migraphx_fpga
)
endif
()
test/auto_contiguous_test.cpp
View file @
13d14c66
...
...
@@ -158,6 +158,31 @@ TEST_CASE(two_transpose_gather)
EXPECT
(
m1
==
m2
);
}
TEST_CASE
(
standard_reshape_lazy
)
{
migraphx
::
module
m1
;
{
auto
data
=
m1
.
add_parameter
(
"2x2"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
,
4
,
5
}});
auto
add
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
data
,
data
);
auto
r
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"reshape_lazy"
,
{{
"dims"
,
{
2
,
1
,
12
,
5
}}}),
add
);
m1
.
add_return
({
r
});
}
run_pass
(
m1
);
migraphx
::
module
m2
;
{
auto
data
=
m2
.
add_parameter
(
"2x2"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
,
4
,
5
}});
auto
add
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
data
,
data
);
auto
ca
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
add
);
auto
r
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"reshape_lazy"
,
{{
"dims"
,
{
2
,
1
,
12
,
5
}}}),
ca
);
m2
.
add_return
({
r
});
}
EXPECT
(
m1
==
m2
);
}
TEST_CASE
(
standard_reshape
)
{
migraphx
::
module
m1
;
...
...
@@ -173,8 +198,7 @@ TEST_CASE(standard_reshape)
{
auto
data
=
m2
.
add_parameter
(
"2x2"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
3
,
4
,
5
}});
auto
add
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
data
,
data
);
auto
ca
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
add
);
auto
r
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
{
2
,
1
,
12
,
5
}}}),
ca
);
auto
r
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"reshape"
,
{{
"dims"
,
{
2
,
1
,
12
,
5
}}}),
add
);
m2
.
add_return
({
r
});
}
...
...
test/check_shapes_test.cpp
View file @
13d14c66
...
...
@@ -31,24 +31,39 @@
using
migraphx
::
shape
;
bool
create_shapes
(
bool
dynamic_allowed
)
void
create_shapes
(
bool
dynamic_allowed
)
{
try
{
shape
a
{
shape
::
int64_type
,
{
3
}};
shape
b
{
shape
::
float_type
,
{{
3
,
6
},
{
4
,
4
}}};
auto
op
=
migraphx
::
make_op
(
"add"
);
migraphx
::
check_shapes
{{
a
,
b
},
op
,
dynamic_allowed
}.
has
(
2
);
return
true
;
}
catch
(...)
{
return
false
;
}
shape
a
{
shape
::
int64_type
,
{
3
}};
shape
b
{
shape
::
float_type
,
{{
3
,
6
},
{
4
,
4
}}};
migraphx
::
check_shapes
{{
a
,
b
},
""
,
dynamic_allowed
}.
has
(
2
);
}
TEST_CASE
(
allow_dynamic_shape
)
{
EXPECT
(
create_shapes
(
true
));
}
TEST_CASE
(
allow_dynamic_shape
)
{
EXPECT
(
not
test
::
throws
([]
{
create_shapes
(
true
);
}));
}
TEST_CASE
(
fail_dynamic_shape
)
{
EXPECT
(
test
::
throws
([]
{
create_shapes
(
false
);
}));
}
TEST_CASE
(
fail_dynamic_shape
)
{
EXPECT
(
not
create_shapes
(
false
));
}
TEST_CASE
(
same_layout_fail
)
{
EXPECT
(
test
::
throws
([]
{
shape
a
{
shape
::
float_type
,
{
2
,
3
}};
shape
b
{
shape
::
float_type
,
{
2
,
3
},
{
1
,
2
}};
migraphx
::
check_shapes
{{
a
,
b
},
""
}.
same_layout
();
}));
}
TEST_CASE
(
same_layout_pass
)
{
EXPECT
(
not
test
::
throws
([]
{
shape
a
{
shape
::
float_type
,
{
2
,
3
},
{
1
,
2
}};
shape
b
{
shape
::
float_type
,
{
2
,
3
},
{
1
,
2
}};
migraphx
::
check_shapes
{{
a
,
b
},
""
}.
same_layout
();
}));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/eliminate_allocation_test.cpp
View file @
13d14c66
...
...
@@ -55,7 +55,7 @@ struct allocate
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
};
...
...
test/eliminate_concat_test.cpp
View file @
13d14c66
...
...
@@ -60,7 +60,7 @@ struct concat
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
};
...
...
@@ -104,7 +104,7 @@ struct allocate
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
};
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment