Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
70fe454f
"vscode:/vscode.git/clone" did not exist on "6adb4b1cb57fe1f1a9430e8b6f97e4c710caf883"
Commit
70fe454f
authored
Oct 20, 2023
by
Umang Yadav
Browse files
Merge branch 'develop' into resnet50_partition
parents
ea0b8059
f47e0b5b
Changes
44
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
339 additions
and
71 deletions
+339
-71
CMakeLists.txt
CMakeLists.txt
+1
-1
docs/.sphinx/requirements.txt
docs/.sphinx/requirements.txt
+3
-3
src/include/migraphx/argument.hpp
src/include/migraphx/argument.hpp
+1
-1
src/include/migraphx/op/allocate.hpp
src/include/migraphx/op/allocate.hpp
+2
-2
src/include/migraphx/op/pooling.hpp
src/include/migraphx/op/pooling.hpp
+2
-2
src/onnx/parse_mean_variance_normalization.cpp
src/onnx/parse_mean_variance_normalization.cpp
+86
-0
src/onnx/parse_trilu.cpp
src/onnx/parse_trilu.cpp
+4
-4
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+18
-3
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+19
-7
src/targets/gpu/fuse_ck.cpp
src/targets/gpu/fuse_ck.cpp
+18
-3
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+3
-3
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+33
-30
test/eliminate_allocation_test.cpp
test/eliminate_allocation_test.cpp
+1
-1
test/eliminate_concat_test.cpp
test/eliminate_concat_test.cpp
+2
-2
test/memory_coloring_test.cpp
test/memory_coloring_test.cpp
+1
-1
test/normalize_ops_test.cpp
test/normalize_ops_test.cpp
+1
-1
test/onnx/.onnxrt-commit
test/onnx/.onnxrt-commit
+1
-1
test/onnx/gen_onnx.py
test/onnx/gen_onnx.py
+143
-6
test/onnx/mvn_axes_rank_too_big_test.onnx
test/onnx/mvn_axes_rank_too_big_test.onnx
+0
-0
test/onnx/mvn_axes_rank_too_small_test.onnx
test/onnx/mvn_axes_rank_too_small_test.onnx
+0
-0
No files found.
CMakeLists.txt
View file @
70fe454f
...
@@ -76,7 +76,7 @@ include(ROCMSetupVersion)
...
@@ -76,7 +76,7 @@ include(ROCMSetupVersion)
option
(
BUILD_DEV
"Build for development purpose only"
OFF
)
option
(
BUILD_DEV
"Build for development purpose only"
OFF
)
rocm_setup_version
(
VERSION 2.8.0
)
rocm_setup_version
(
VERSION 2.8.0
)
set
(
MIGRAPHX_SO_VERSION
${
PROJECT_VERSION_MAJOR
}
.
${
PROJECT_VERSION_MINOR
}
.
${
PROJECT_VERSION_PATCH
}
)
set
(
MIGRAPHX_SO_VERSION
${
PROJECT_VERSION_MAJOR
}
.
${
PROJECT_VERSION_MINOR
}
)
option
(
BUILD_SHARED_LIBS
"Build as a shared library"
ON
)
option
(
BUILD_SHARED_LIBS
"Build as a shared library"
ON
)
...
...
docs/.sphinx/requirements.txt
View file @
70fe454f
...
@@ -35,7 +35,7 @@ fastjsonschema==2.16.3
...
@@ -35,7 +35,7 @@ fastjsonschema==2.16.3
# via rocm-docs-core
# via rocm-docs-core
gitdb==4.0.10
gitdb==4.0.10
# via gitpython
# via gitpython
gitpython==3.1.3
2
gitpython==3.1.3
7
# via rocm-docs-core
# via rocm-docs-core
idna==3.4
idna==3.4
# via requests
# via requests
...
@@ -87,7 +87,7 @@ requests==2.28.2
...
@@ -87,7 +87,7 @@ requests==2.28.2
# via
# via
# pygithub
# pygithub
# sphinx
# sphinx
rocm-docs-core==0.2
4.2
rocm-docs-core==0.2
6.0
# via -r requirements.in
# via -r requirements.in
smmap==5.0.0
smmap==5.0.0
# via gitdb
# via gitdb
...
@@ -130,7 +130,7 @@ sphinxcontrib-serializinghtml==1.1.5
...
@@ -130,7 +130,7 @@ sphinxcontrib-serializinghtml==1.1.5
# via sphinx
# via sphinx
typing-extensions==4.5.0
typing-extensions==4.5.0
# via pydata-sphinx-theme
# via pydata-sphinx-theme
urllib3==1.26.1
5
urllib3==1.26.1
8
# via requests
# via requests
wrapt==1.15.0
wrapt==1.15.0
# via deprecated
# via deprecated
src/include/migraphx/argument.hpp
View file @
70fe454f
...
@@ -46,7 +46,7 @@ struct MIGRAPHX_EXPORT argument : raw_data<argument>
...
@@ -46,7 +46,7 @@ struct MIGRAPHX_EXPORT argument : raw_data<argument>
{
{
argument
()
=
default
;
argument
()
=
default
;
argument
(
const
shape
&
s
);
explicit
argument
(
const
shape
&
s
);
template
<
class
F
,
MIGRAPHX_REQUIRES
(
std
::
is_pointer
<
decltype
(
std
::
declval
<
F
>()())
>
{})
>
template
<
class
F
,
MIGRAPHX_REQUIRES
(
std
::
is_pointer
<
decltype
(
std
::
declval
<
F
>()())
>
{})
>
argument
(
shape
s
,
F
d
)
argument
(
shape
s
,
F
d
)
...
...
src/include/migraphx/op/allocate.hpp
View file @
70fe454f
...
@@ -88,13 +88,13 @@ struct allocate
...
@@ -88,13 +88,13 @@ struct allocate
{
{
if
(
args
.
empty
())
if
(
args
.
empty
())
{
{
return
{
output_shape
};
return
argument
{
output_shape
};
}
}
else
else
{
{
std
::
vector
<
std
::
size_t
>
output_dims
(
output_shape
.
ndim
());
std
::
vector
<
std
::
size_t
>
output_dims
(
output_shape
.
ndim
());
args
.
at
(
0
).
visit
([
&
](
auto
a
)
{
output_dims
.
assign
(
a
.
begin
(),
a
.
end
());
});
args
.
at
(
0
).
visit
([
&
](
auto
a
)
{
output_dims
.
assign
(
a
.
begin
(),
a
.
end
());
});
return
{
shape
{
buf_type
,
output_dims
}};
return
argument
{
shape
{
buf_type
,
output_dims
}};
}
}
}
}
};
};
...
...
src/include/migraphx/op/pooling.hpp
View file @
70fe454f
...
@@ -411,7 +411,7 @@ struct pooling
...
@@ -411,7 +411,7 @@ struct pooling
// for dynamic GlobalPooling, there's no padding
// for dynamic GlobalPooling, there's no padding
kernel_dims
.
insert
(
kernel_dims
.
end
(),
input_lens
.
begin
()
+
2
,
input_lens
.
end
());
kernel_dims
.
insert
(
kernel_dims
.
end
(),
input_lens
.
begin
()
+
2
,
input_lens
.
end
());
output_shape
=
dyn_out
.
computed_shape
;
output_shape
=
dyn_out
.
computed_shape
;
result
=
dyn_out
.
computed_shape
;
result
=
argument
{
dyn_out
.
computed_shape
}
;
}
}
else
if
((
padding_mode
!=
op
::
padding_mode_t
::
default_
))
else
if
((
padding_mode
!=
op
::
padding_mode_t
::
default_
))
{
{
...
@@ -439,7 +439,7 @@ struct pooling
...
@@ -439,7 +439,7 @@ struct pooling
{
{
kernel_dims
=
this
->
lengths
;
kernel_dims
=
this
->
lengths
;
output_shape
=
dyn_out
.
computed_shape
;
output_shape
=
dyn_out
.
computed_shape
;
result
=
dyn_out
.
computed_shape
;
result
=
argument
{
dyn_out
.
computed_shape
}
;
}
}
// Perform the computation and populate result
// Perform the computation and populate result
...
...
src/onnx/parse_mean_variance_normalization.cpp
0 → 100644
View file @
70fe454f
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
struct
parse_mean_variance_normalization
:
op_parser
<
parse_mean_variance_normalization
>
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"MeanVarianceNormalization"
}};
}
instruction_ref
parse
(
const
op_desc
&
/*opd*/
,
const
onnx_parser
&
/*parser*/
,
onnx_parser
::
node_info
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
auto
&&
data
=
args
.
front
();
auto
data_rank
=
data
->
get_shape
().
ndim
();
std
::
vector
<
int64_t
>
axes
{
0
,
2
,
3
};
if
(
contains
(
info
.
attributes
,
"axes"
))
{
const
auto
&
axes_attr
=
info
.
attributes
[
"axes"
].
ints
();
axes
.
assign
(
axes_attr
.
begin
(),
axes_attr
.
end
());
}
else
if
(
data_rank
!=
4
)
{
MIGRAPHX_THROW
(
"Input tensor needs to be rank 4 when axes is not specified. Instead it is rank "
+
std
::
to_string
(
data_rank
));
}
if
(
axes
.
size
()
!=
data_rank
-
1
)
{
MIGRAPHX_THROW
(
"Length of axes array needs to be equal to input tensor rank - 1"
);
}
auto
data_mean
=
info
.
add_instruction
(
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
data
);
auto
data_mean_squared
=
info
.
add_common_op
(
"mul"
,
data_mean
,
data_mean
);
auto
data_squared
=
info
.
add_common_op
(
"mul"
,
data
,
data
);
auto
data_squared_mean
=
info
.
add_instruction
(
make_op
(
"reduce_mean"
,
{{
"axes"
,
axes
}}),
data_squared
);
auto
mean_sub
=
info
.
add_common_op
(
"sub"
,
data_squared_mean
,
data_mean_squared
);
auto
std
=
info
.
add_common_op
(
"sqrt"
,
mean_sub
);
auto
dividend
=
info
.
add_common_op
(
"sub"
,
data
,
data_mean
);
auto
epsilon
=
info
.
add_literal
({
data
->
get_shape
().
type
(),
{
data
->
get_shape
().
type
()
==
shape
::
half_type
?
1e-7
:
1e-9
}});
auto
divisor
=
info
.
add_common_op
(
"add"
,
std
,
epsilon
);
return
info
.
add_common_op
(
"div"
,
dividend
,
divisor
);
}
};
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/onnx/parse_trilu.cpp
View file @
70fe454f
...
@@ -56,9 +56,6 @@ struct parse_trilu : op_parser<parse_trilu>
...
@@ -56,9 +56,6 @@ struct parse_trilu : op_parser<parse_trilu>
k
=
arg_k
.
at
<
int
>
();
k
=
arg_k
.
at
<
int
>
();
}
}
if
(
k
<
0
)
MIGRAPHX_THROW
(
"PARSE_TRILU: negative k values not supported"
);
if
(
contains
(
info
.
attributes
,
"upper"
))
if
(
contains
(
info
.
attributes
,
"upper"
))
{
{
upper
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"upper"
).
i
());
upper
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"upper"
).
i
());
...
@@ -69,9 +66,12 @@ struct parse_trilu : op_parser<parse_trilu>
...
@@ -69,9 +66,12 @@ struct parse_trilu : op_parser<parse_trilu>
// when creating the mask, if upper == 1,
// when creating the mask, if upper == 1,
// the inner triangle will have values set to 0
// the inner triangle will have values set to 0
std
::
vector
<
bool
>
mask_mat
(
num_rows
*
num_cols
,
upper
);
std
::
vector
<
bool
>
mask_mat
(
num_rows
*
num_cols
,
upper
);
// if upper == 0, kth diagonal must also be masked
if
(
not
upper
)
k
++
;
for
(
size_t
i
=
0
;
i
<
num_rows
;
i
++
)
for
(
size_t
i
=
0
;
i
<
num_rows
;
i
++
)
{
{
for
(
size_
t
j
=
0
;
j
<
std
::
min
(
k
,
static_cast
<
int
>
(
num_cols
));
j
++
)
for
(
in
t
j
=
0
;
j
<
std
::
min
(
k
,
static_cast
<
int
>
(
num_cols
));
j
++
)
{
{
mask_mat
[
i
*
num_cols
+
j
]
=
not
upper
;
mask_mat
[
i
*
num_cols
+
j
]
=
not
upper
;
}
}
...
...
src/rewrite_quantization.cpp
View file @
70fe454f
...
@@ -33,6 +33,8 @@
...
@@ -33,6 +33,8 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_CK_WORKAROUNDS
);
void
apply_quantizelinear
(
module
&
m
,
instruction_ref
ins
)
void
apply_quantizelinear
(
module
&
m
,
instruction_ref
ins
)
{
{
assert
(
ins
->
name
()
==
"quantizelinear"
);
assert
(
ins
->
name
()
==
"quantizelinear"
);
...
@@ -62,9 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -62,9 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant
=
qt
.
max
();
max_quant
=
qt
.
max
();
min_quant
=
qt
.
min
();
min_quant
=
qt
.
min
();
});
});
auto
s
=
add_zero_point
->
get_shape
();
auto
s
=
add_zero_point
->
get_shape
();
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
instruction_ref
min_arg
;
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
instruction_ref
max_arg
;
if
(
enabled
(
MIGRAPHX_ENABLE_CK_WORKAROUNDS
{}))
{
std
::
vector
<
int
>
min_data
(
s
.
elements
(),
min_quant
);
std
::
vector
<
int
>
max_data
(
s
.
elements
(),
max_quant
);
min_arg
=
m
.
add_literal
(
literal
(
s
,
min_data
));
max_arg
=
m
.
add_literal
(
literal
(
s
,
max_data
));
}
else
{
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
}
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
m
.
replace_instruction
(
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
...
...
src/targets/gpu/compile_hip_code_object.cpp
View file @
70fe454f
...
@@ -139,6 +139,12 @@ void hip_compile_options::set_launch_params(
...
@@ -139,6 +139,12 @@ void hip_compile_options::set_launch_params(
global
=
compute_global
(
local
);
global
=
compute_global
(
local
);
}
}
static
bool
hip_accept_non_uniform_wg
()
{
static
bool
non_uniform_wg
=
hip_has_flags
({
"-fno-offload-uniform-block"
});
return
non_uniform_wg
;
}
std
::
function
<
std
::
size_t
(
std
::
size_t
local
)
>
std
::
function
<
std
::
size_t
(
std
::
size_t
local
)
>
compute_global_for
(
context
&
ctx
,
std
::
size_t
n
,
std
::
size_t
over
)
compute_global_for
(
context
&
ctx
,
std
::
size_t
n
,
std
::
size_t
over
)
{
{
...
@@ -146,13 +152,14 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
...
@@ -146,13 +152,14 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
std
::
size_t
max_global
=
ctx
.
get_current_device
().
get_cu_count
()
*
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
ctx
.
get_current_device
().
get_max_workitems_per_cu
();
return
[
n
,
over
,
max_global
](
std
::
size_t
local
)
{
return
[
n
,
over
,
max_global
](
std
::
size_t
local
)
{
// hip require global workitems multiple of local workitems. It may degrade performance.
std
::
size_t
num_elements
=
n
;
// [TODO]: consider adding "fno-hip-uniform-block" flag when it becomes available.
if
(
not
hip_accept_non_uniform_wg
())
// https://reviews.llvm.org/D155213
{
std
::
size_t
num_elements
=
((
n
+
local
-
1
)
/
local
)
*
local
;
num_elements
=
(
1
+
(
n
-
1
)
/
local
)
*
local
;
std
::
size_t
groups
=
(
num_elements
+
local
-
1
)
/
local
;
}
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
groups
=
1
+
(
num_elements
-
1
)
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
std
::
size_t
max_blocks
=
max_global
/
local
;
std
::
size_t
nglobal
=
std
::
min
(
max_blocks
*
over
,
groups
)
*
local
;
return
std
::
min
(
nglobal
,
num_elements
);
return
std
::
min
(
nglobal
,
num_elements
);
};
};
}
}
...
@@ -183,6 +190,11 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
...
@@ -183,6 +190,11 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
generate_args_hpp
(
options
.
virtual_inputs
.
empty
()
?
options
.
inputs
:
options
.
virtual_inputs
);
generate_args_hpp
(
options
.
virtual_inputs
.
empty
()
?
options
.
inputs
:
options
.
virtual_inputs
);
srcs
.
emplace_back
(
"args.hpp"
,
args_hpp
);
srcs
.
emplace_back
(
"args.hpp"
,
args_hpp
);
if
(
options
.
global
%
options
.
local
!=
0
and
hip_accept_non_uniform_wg
())
options
.
params
+=
" -fno-offload-uniform-block"
;
else
assert
(
options
.
global
%
options
.
local
==
0
);
options
.
params
+=
" -DMIGRAPHX_NGLOBAL="
+
std
::
to_string
(
options
.
global
);
options
.
params
+=
" -DMIGRAPHX_NGLOBAL="
+
std
::
to_string
(
options
.
global
);
options
.
params
+=
" -DMIGRAPHX_NLOCAL="
+
std
::
to_string
(
options
.
local
);
options
.
params
+=
" -DMIGRAPHX_NLOCAL="
+
std
::
to_string
(
options
.
local
);
options
.
params
+=
" "
+
join_strings
(
compiler_warnings
(),
" "
);
options
.
params
+=
" "
+
join_strings
(
compiler_warnings
(),
" "
);
...
...
src/targets/gpu/fuse_ck.cpp
View file @
70fe454f
...
@@ -26,6 +26,7 @@
...
@@ -26,6 +26,7 @@
#include <migraphx/matcher.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/device_name.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -92,6 +93,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
...
@@ -92,6 +93,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
auto
m
=
a
.
lens
()[
a
.
lens
().
size
()
-
2
];
auto
m
=
a
.
lens
()[
a
.
lens
().
size
()
-
2
];
auto
n
=
b
.
lens
().
back
();
auto
n
=
b
.
lens
().
back
();
auto
k
=
a
.
lens
().
back
();
auto
k
=
a
.
lens
().
back
();
auto
batch_size
=
std
::
accumulate
(
a
.
lens
().
rbegin
()
+
2
,
a
.
lens
().
rend
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
// Integer gemms must be divisible by 4 in ck
// Integer gemms must be divisible by 4 in ck
if
(
contains
({
shape
::
int8_type
,
shape
::
int32_type
},
ins
->
get_shape
().
type
()))
if
(
contains
({
shape
::
int8_type
,
shape
::
int32_type
},
ins
->
get_shape
().
type
()))
{
{
...
@@ -102,9 +105,17 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
...
@@ -102,9 +105,17 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
if
(
k
%
4
!=
0
)
if
(
k
%
4
!=
0
)
return
false
;
return
false
;
}
}
// Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
// to avoid poor-performing GEMM kernels from CK
if
(
device_name
==
"gfx940"
)
// To-do: Investigate a more precise strategy
{
if
(
ins
->
get_shape
().
type
()
==
shape
::
half_type
)
{
if
(
batch_size
>=
64
)
return
m
<
2048
or
k
<=
64
or
n
<=
384
or
n
>=
2048
;
return
true
;
}
return
true
;
}
return
k
<=
2048
;
return
k
<=
2048
;
}
}
...
@@ -140,6 +151,10 @@ struct find_ck_gemm_pointwise
...
@@ -140,6 +151,10 @@ struct find_ck_gemm_pointwise
return
not
input
->
inputs
().
empty
()
and
input
->
inputs
().
front
()
->
name
()
==
"capture"
;
return
not
input
->
inputs
().
empty
()
and
input
->
inputs
().
front
()
->
name
()
==
"capture"
;
}))
}))
return
;
return
;
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[](
auto
input
)
{
return
not
input
->
inputs
().
empty
()
and
input
->
inputs
().
front
()
->
name
()
==
"capture"
;
}))
return
;
assert
(
gemm_it
!=
inputs
.
end
());
assert
(
gemm_it
!=
inputs
.
end
());
if
(
gemm_idx
!=
0
)
if
(
gemm_idx
!=
0
)
{
{
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
70fe454f
...
@@ -199,9 +199,9 @@ struct miopen_convolution
...
@@ -199,9 +199,9 @@ struct miopen_convolution
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate
=
true
;
preallocate
=
true
;
#endif
#endif
auto
x
=
preallocate
?
to_gpu
(
generate_argument
(
x_shape
))
:
inputs
[
0
];
auto
x
=
preallocate
?
to_gpu
(
generate_argument
(
x_shape
))
:
argument
{
inputs
[
0
]
}
;
auto
w
=
preallocate
?
to_gpu
(
generate_argument
(
w_shape
))
:
inputs
[
1
];
auto
w
=
preallocate
?
to_gpu
(
generate_argument
(
w_shape
))
:
argument
{
inputs
[
1
]
}
;
auto
y
=
preallocate
?
allocate_gpu
(
output_shape
)
:
inputs
[
2
];
auto
y
=
preallocate
?
allocate_gpu
(
output_shape
)
:
argument
{
inputs
[
2
]
}
;
auto
workspace
=
auto
workspace
=
preallocate
?
allocate_gpu
(
workspace_shape
)
:
migraphx
::
argument
(
workspace_shape
);
preallocate
?
allocate_gpu
(
workspace_shape
)
:
migraphx
::
argument
(
workspace_shape
);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
70fe454f
...
@@ -31,6 +31,14 @@
...
@@ -31,6 +31,14 @@
#include <migraphx/kernels/debug.hpp>
#include <migraphx/kernels/debug.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/functional.hpp>
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-identifier"
extern
"C"
__device__
size_t
__ockl_get_enqueued_local_size
(
uint
);
// NOLINT
extern
"C"
__device__
size_t
__ockl_get_local_size
(
uint
);
// NOLINT
#pragma clang diagnostic pop
#endif
namespace
migraphx
{
namespace
migraphx
{
#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL)
#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL)
...
@@ -45,43 +53,37 @@ inline __device__ __attribute__((const)) index_int compute_global_size()
...
@@ -45,43 +53,37 @@ inline __device__ __attribute__((const)) index_int compute_global_size()
// This actualy works even when global is not divisible by local size.
// This actualy works even when global is not divisible by local size.
// This doesnt actually do a multiplicatiosn. Instead it calls a device
// This doesnt actually do a multiplicatiosn. Instead it calls a device
// function to get the global size, which is why it works.
// function to get the global size, which is why it works.
return
blockDim
.
x
*
gridDim
.
x
;
// NOLINT
return
blockDim
.
x
*
gridDim
.
x
;
// NOLINT
#endif
#endif
}
}
// We cant just use blockDim.x to get the local size since its broken on hip
#ifdef MIGRAPHX_NGROUP
// when global is not divisible by local size. In this case, we calulate the
// If global is divisible by local then local can be a const
// size for the last group.
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#define MIGRAPHX_HAS_CONST_LOCAL 1
#endif
#endif
inline
__device__
__attribute__
((
const
))
index_int
compute_local_size
()
inline
__device__
__attribute__
((
const
))
index_int
compute_local_size
()
{
{
#ifdef MIGRAPHX_NLOCAL
#ifdef MIGRAPHX_HAS_CONST_LOCAL
const
auto
nlocal
=
MIGRAPHX_NLOCAL
;
return
MIGRAPHX_NLOCAL
;
#else
const
auto
nlocal
=
blockDim
.
x
;
// NOLINT
#endif
#ifdef MIGRAPHX_NGROUP
const
auto
ngroup
=
MIGRAPHX_NGROUP
;
#else
#else
const
auto
ngroup
=
gridDim
.
x
;
// NOLINT
// Returns block size. For the non-uniform block it returns the size of the non-uniform block.
return
__ockl_get_local_size
(
0
);
// NOLINT
#endif
#endif
const
auto
group_id
=
blockIdx
.
x
;
// NOLINT
const
auto
nglobal
=
compute_global_size
();
if
(
group_id
==
ngroup
-
1
)
{
return
1
+
(
nglobal
-
1
)
%
nlocal
;
}
else
{
return
nlocal
;
// NOLINT
}
}
}
#ifdef MIGRAPHX_NGROUP
inline
__device__
__attribute__
((
const
))
index_int
compute_max_local_size
()
// If global is divisible by local then local can be a const
{
#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1)
#ifdef MIGRAPHX_LOCAL
#define MIGRAPHX_HAS_CONST_LOCAL 1
return
MIGRAPHX_NLOCAL
;
#endif
#else
// Returns the block size. When workgrop has non-uniform block, this returns size of the uniform
// block.
return
__ockl_get_enqueued_local_size
(
0
);
// NOLINT
#endif
#endif
}
struct
index
struct
index
{
{
...
@@ -126,8 +128,8 @@ struct index
...
@@ -126,8 +128,8 @@ struct index
#else
#else
__device__
index_int
max_nlocal
()
const
__device__
index_int
max_nlocal
()
const
{
{
MIGRAPHX_ASSERT
(
blockDim
.
x
>
0
);
MIGRAPHX_ASSERT
(
compute_max_local_size
()
>
0
);
return
blockDim
.
x
;
return
compute_max_local_size
()
;
}
}
#endif
#endif
...
@@ -249,7 +251,8 @@ struct index
...
@@ -249,7 +251,8 @@ struct index
#endif
#endif
inline
__device__
__attribute__
((
const
))
index
make_index
()
inline
__device__
__attribute__
((
const
))
index
make_index
()
{
{
return
index
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
return
index
{
blockIdx
.
x
*
compute_max_local_size
()
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
}
}
}
// namespace migraphx
}
// namespace migraphx
...
...
test/eliminate_allocation_test.cpp
View file @
70fe454f
...
@@ -55,7 +55,7 @@ struct allocate
...
@@ -55,7 +55,7 @@ struct allocate
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
...
test/eliminate_concat_test.cpp
View file @
70fe454f
...
@@ -60,7 +60,7 @@ struct concat
...
@@ -60,7 +60,7 @@ struct concat
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
@@ -104,7 +104,7 @@ struct allocate
...
@@ -104,7 +104,7 @@ struct allocate
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
...
test/memory_coloring_test.cpp
View file @
70fe454f
...
@@ -55,7 +55,7 @@ struct allocate
...
@@ -55,7 +55,7 @@ struct allocate
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
...
test/normalize_ops_test.cpp
View file @
70fe454f
...
@@ -57,7 +57,7 @@ struct normalize_test_op
...
@@ -57,7 +57,7 @@ struct normalize_test_op
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
output_shape
,
const
std
::
vector
<
migraphx
::
argument
>&
)
const
const
std
::
vector
<
migraphx
::
argument
>&
)
const
{
{
return
{
output_shape
};
return
migraphx
::
argument
{
output_shape
};
}
}
};
};
...
...
test/onnx/.onnxrt-commit
View file @
70fe454f
6
d7bc2a097a1a08541cd0d4628831c79ab8092d5
6
35d3faa3b3908d2806d009dc6872152cfcfcdda
test/onnx/gen_onnx.py
View file @
70fe454f
...
@@ -4681,6 +4681,77 @@ def mean_integral_test():
...
@@ -4681,6 +4681,77 @@ def mean_integral_test():
return
([
node
],
data
,
[
mean
])
return
([
node
],
data
,
[
mean
])
def
mvn_default_axes_test_base
(
dims
,
type
=
TensorProto
.
FLOAT
):
data
=
helper
.
make_tensor_value_info
(
"data"
,
type
,
dims
)
out
=
helper
.
make_tensor_value_info
(
"out"
,
type
,
dims
)
node
=
helper
.
make_node
(
"MeanVarianceNormalization"
,
inputs
=
[
"data"
],
outputs
=
[
"out"
])
return
([
node
],
[
data
],
[
out
])
@
onnx_test
()
def
mvn_default_axes_test
():
return
mvn_default_axes_test_base
([
2
,
2
,
2
,
2
])
@
onnx_test
()
def
mvn_default_axes_fp16_test
():
return
mvn_default_axes_test_base
([
2
,
2
,
2
,
2
],
TensorProto
.
FLOAT16
)
@
onnx_test
()
def
mvn_default_axes_rank_too_small_test
():
return
mvn_default_axes_test_base
([
2
,
2
,
2
])
@
onnx_test
()
def
mvn_default_axes_rank_too_big_test
():
return
mvn_default_axes_test_base
([
2
,
2
,
2
,
2
,
2
])
def
mvn_n_rank_test_base
(
axes
,
dims
,
type
=
TensorProto
.
FLOAT
):
data
=
helper
.
make_tensor_value_info
(
"data"
,
type
,
dims
)
out
=
helper
.
make_tensor_value_info
(
"out"
,
type
,
dims
)
node
=
helper
.
make_node
(
"MeanVarianceNormalization"
,
inputs
=
[
"data"
],
outputs
=
[
"out"
],
axes
=
axes
)
return
([
node
],
[
data
],
[
out
])
@
onnx_test
()
def
mvn_rank_2_test
():
return
mvn_n_rank_test_base
([
1
],
[
2
,
2
])
@
onnx_test
()
def
mvn_rank_2_fp16_test
():
return
mvn_n_rank_test_base
([
1
],
[
2
,
2
],
TensorProto
.
FLOAT16
)
@
onnx_test
()
def
mvn_rank_3_test
():
return
mvn_n_rank_test_base
([
0
,
1
],
[
2
,
2
,
2
])
@
onnx_test
()
def
mvn_rank_3_fp16_test
():
return
mvn_n_rank_test_base
([
0
,
1
],
[
2
,
2
,
2
],
TensorProto
.
FLOAT16
)
@
onnx_test
()
def
mvn_axes_rank_too_small_test
():
return
mvn_n_rank_test_base
([
0
,
1
,
2
],
[
2
,
2
,
2
])
@
onnx_test
()
def
mvn_axes_rank_too_big_test
():
return
mvn_n_rank_test_base
([
0
],
[
2
,
2
,
2
])
@
onnx_test
()
@
onnx_test
()
def
min_test
():
def
min_test
():
a
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
3
])
a
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
3
])
...
@@ -8502,7 +8573,7 @@ def transpose_gather_test():
...
@@ -8502,7 +8573,7 @@ def transpose_gather_test():
@
onnx_test
()
@
onnx_test
()
def
tri
l
u_test
():
def
triu_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
...
@@ -8515,7 +8586,7 @@ def trilu_test():
...
@@ -8515,7 +8586,7 @@ def trilu_test():
@
onnx_test
()
@
onnx_test
()
def
tri
l
u_batch_diff_k_test
():
def
triu_batch_diff_k_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
2
,
2
,
3
])
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
2
,
2
,
3
])
k
=
np
.
array
([
2
])
k
=
np
.
array
([
2
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
2
,
2
,
3
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
2
,
2
,
3
])
...
@@ -8533,7 +8604,24 @@ def trilu_batch_diff_k_test():
...
@@ -8533,7 +8604,24 @@ def trilu_batch_diff_k_test():
@
onnx_test
()
@
onnx_test
()
def
trilu_lower_test
():
def
tril_batch_diff_k_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
2
,
2
,
3
])
k
=
np
.
array
([
2
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
2
,
2
,
3
])
k_tensor
=
helper
.
make_tensor
(
name
=
'k'
,
data_type
=
TensorProto
.
INT64
,
dims
=
k
.
shape
,
vals
=
k
.
astype
(
np
.
int64
))
node
=
onnx
.
helper
.
make_node
(
'Trilu'
,
inputs
=
[
'x'
,
'k'
],
outputs
=
[
'y'
],
upper
=
0
)
return
([
node
],
[
x
],
[
y
],
[
k_tensor
])
@
onnx_test
()
def
tril_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
...
@@ -8542,7 +8630,7 @@ def trilu_lower_test():
...
@@ -8542,7 +8630,7 @@ def trilu_lower_test():
@
onnx_test
()
@
onnx_test
()
def
tri
l
u_neg_k_test
():
def
triu_neg_k_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
k
=
np
.
array
([
-
1
])
k
=
np
.
array
([
-
1
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
...
@@ -8556,7 +8644,23 @@ def trilu_neg_k_test():
...
@@ -8556,7 +8644,23 @@ def trilu_neg_k_test():
@
onnx_test
()
@
onnx_test
()
def
trilu_out_k_test
():
def
tril_neg_k_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
k
=
np
.
array
([
-
1
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
k_tensor
=
helper
.
make_tensor
(
name
=
'k'
,
data_type
=
TensorProto
.
INT64
,
dims
=
k
.
shape
,
vals
=
k
.
astype
(
np
.
int64
))
node
=
onnx
.
helper
.
make_node
(
'Trilu'
,
inputs
=
[
'x'
,
'k'
],
outputs
=
[
'y'
],
upper
=
0
)
return
([
node
],
[
x
],
[
y
],
[
k_tensor
])
@
onnx_test
()
def
triu_out_k_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
k
=
np
.
array
([
5
])
k
=
np
.
array
([
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
...
@@ -8570,7 +8674,23 @@ def trilu_out_k_test():
...
@@ -8570,7 +8674,23 @@ def trilu_out_k_test():
@
onnx_test
()
@
onnx_test
()
def
trilu_row_one_test
():
def
tril_out_k_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
k
=
np
.
array
([
5
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
3
,
4
])
k_tensor
=
helper
.
make_tensor
(
name
=
'k'
,
data_type
=
TensorProto
.
INT64
,
dims
=
k
.
shape
,
vals
=
k
.
astype
(
np
.
int64
))
node
=
onnx
.
helper
.
make_node
(
'Trilu'
,
inputs
=
[
'x'
,
'k'
],
outputs
=
[
'y'
],
upper
=
0
)
return
([
node
],
[
x
],
[
y
],
[
k_tensor
])
@
onnx_test
()
def
triu_row_one_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
1
,
4
])
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
1
,
4
])
k
=
np
.
array
([
1
])
k
=
np
.
array
([
1
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
1
,
4
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
1
,
4
])
...
@@ -8587,6 +8707,23 @@ def trilu_row_one_test():
...
@@ -8587,6 +8707,23 @@ def trilu_row_one_test():
return
([
node
],
[
x
],
[
y
],
[
k_tensor
])
return
([
node
],
[
x
],
[
y
],
[
k_tensor
])
@
onnx_test
()
def
tril_row_one_test
():
x
=
helper
.
make_tensor_value_info
(
'x'
,
TensorProto
.
FLOAT
,
[
1
,
4
])
k
=
np
.
array
([
1
])
y
=
helper
.
make_tensor_value_info
(
'y'
,
TensorProto
.
FLOAT
,
[
1
,
4
])
k_tensor
=
helper
.
make_tensor
(
name
=
'k'
,
data_type
=
TensorProto
.
INT64
,
dims
=
k
.
shape
,
vals
=
k
.
astype
(
np
.
int64
))
node
=
onnx
.
helper
.
make_node
(
'Trilu'
,
inputs
=
[
'x'
,
'k'
],
outputs
=
[
'y'
],
upper
=
0
)
return
([
node
],
[
x
],
[
y
],
[
k_tensor
])
@
onnx_test
()
@
onnx_test
()
def
undefined_test
():
def
undefined_test
():
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
2
,
3
,
4
,
5
])
x
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
2
,
3
,
4
,
5
])
...
...
test/onnx/mvn_axes_rank_too_big_test.onnx
0 → 100644
View file @
70fe454f
File added
test/onnx/mvn_axes_rank_too_small_test.onnx
0 → 100644
View file @
70fe454f
File added
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment