Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
23851d62
Commit
23851d62
authored
Oct 19, 2022
by
Khalique Ahmed
Browse files
Merge branch 'develop' of
https://github.com/ROCmSoftwarePlatform/AMDMIGraphX
into develop
parents
41d4e92b
5fa42993
Changes
26
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
448 additions
and
845 deletions
+448
-845
Dockerfile
Dockerfile
+1
-1
src/driver/verify.cpp
src/driver/verify.cpp
+2
-1
src/include/migraphx/reflect.hpp
src/include/migraphx/reflect.hpp
+14
-4
src/include/migraphx/streamutils.hpp
src/include/migraphx/streamutils.hpp
+16
-0
src/onnx/parse_batchnorm.cpp
src/onnx/parse_batchnorm.cpp
+11
-10
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+3
-6
src/targets/gpu/convolution.cpp
src/targets/gpu/convolution.cpp
+0
-271
src/targets/gpu/deconvolution.cpp
src/targets/gpu/deconvolution.cpp
+0
-184
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+7
-3
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+11
-11
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+285
-16
src/targets/gpu/include/migraphx/gpu/mlir.hpp
src/targets/gpu/include/migraphx/gpu/mlir.hpp
+2
-1
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+1
-1
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+0
-73
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+18
-50
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+63
-9
src/targets/gpu/perfdb.cpp
src/targets/gpu/perfdb.cpp
+6
-5
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+0
-194
test/api/test_custom_op_gpu.cpp
test/api/test_custom_op_gpu.cpp
+7
-4
No files found.
Dockerfile
100755 → 100644
View file @
23851d62
...
@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
...
@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD
tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
ADD
tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN
cget
-p
/usr/local
install
ROCmSoftwarePlatform/llvm-project-mlir@
e8e77eb16be413d301ea8509726d47f265d9011f
-DBUILD_MIXR_TARGET
=
On
RUN
cget
-p
/usr/local
install
ROCmSoftwarePlatform/llvm-project-mlir@
c0723a7e50043d973cb73ae51dc30d36679ee7e5
-DBUILD_MIXR_TARGET
=
On
ENV
MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV
MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV
MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
ENV
MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
...
...
src/driver/verify.cpp
View file @
23851d62
...
@@ -145,7 +145,7 @@ void verify_reduced(program p,
...
@@ -145,7 +145,7 @@ void verify_reduced(program p,
auto
*
mm
=
p
.
get_main_module
();
auto
*
mm
=
p
.
get_main_module
();
auto
last
=
std
::
prev
(
mm
->
end
(),
n
+
1
);
auto
last
=
std
::
prev
(
mm
->
end
(),
n
+
1
);
mm
->
remove_instructions
(
last
,
mm
->
end
());
mm
->
remove_instructions
(
last
,
mm
->
end
());
std
::
cout
<<
"Verify: "
<<
std
::
endl
;
std
::
cout
<<
"Verify: "
<<
n
<<
std
::
endl
;
std
::
cout
<<
p
<<
std
::
endl
;
std
::
cout
<<
p
<<
std
::
endl
;
verify_program
(
std
::
to_string
(
n
),
p
,
t
,
options
,
quantize
,
inputs
,
tolerance
);
verify_program
(
std
::
to_string
(
n
),
p
,
t
,
options
,
quantize
,
inputs
,
tolerance
);
}
}
...
@@ -159,6 +159,7 @@ void verify_reduced_program(const program& p,
...
@@ -159,6 +159,7 @@ void verify_reduced_program(const program& p,
{
{
const
auto
*
mm
=
p
.
get_main_module
();
const
auto
*
mm
=
p
.
get_main_module
();
auto
n
=
std
::
distance
(
mm
->
begin
(),
mm
->
end
());
auto
n
=
std
::
distance
(
mm
->
begin
(),
mm
->
end
());
std
::
cout
<<
"Verify steps: "
<<
n
<<
std
::
endl
;
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
{
{
verify_reduced
(
p
,
i
,
t
,
options
,
quantize
,
inputs
,
tolerance
);
verify_reduced
(
p
,
i
,
t
,
options
,
quantize
,
inputs
,
tolerance
);
...
...
src/include/migraphx/reflect.hpp
View file @
23851d62
...
@@ -56,11 +56,11 @@ auto reflect_impl(rank<0>, T&, Selector)
...
@@ -56,11 +56,11 @@ auto reflect_impl(rank<0>, T&, Selector)
}
}
template
<
class
T
>
template
<
class
T
>
auto
reflectable_impl
(
rank
<
1
>
,
T
&
&
x
)
auto
reflectable_impl
(
rank
<
1
>
,
const
T
&
x
)
->
decltype
(
T
::
reflect
(
x
,
reflect_placeholder
{}),
std
::
true_type
{});
->
decltype
(
T
::
reflect
(
x
,
reflect_placeholder
{}),
std
::
true_type
{});
template
<
class
T
>
template
<
class
T
>
auto
reflectable_impl
(
rank
<
0
>
,
T
&
&
)
->
decltype
(
std
::
false_type
{});
auto
reflectable_impl
(
rank
<
0
>
,
const
T
&
)
->
decltype
(
std
::
false_type
{});
template
<
class
T
>
template
<
class
T
>
struct
remove_rvalue_reference
struct
remove_rvalue_reference
...
@@ -111,8 +111,18 @@ auto reflect(T& x, Selector f)
...
@@ -111,8 +111,18 @@ auto reflect(T& x, Selector f)
template
<
class
T
>
template
<
class
T
>
auto
reflect_tie
(
T
&
x
)
auto
reflect_tie
(
T
&
x
)
{
{
return
reflect
(
x
,
[](
auto
&&
y
,
auto
&&
...)
{
return
detail
::
wrap
<
decltype
(
y
)
>
(
y
);
})(
return
reflect
(
x
,
[](
auto
&&
y
,
auto
&&
...)
{
[](
auto
&&
...
xs
)
{
return
detail
::
auto_tuple
(
xs
.
get
()...);
});
// cppcheck-suppress UnnecessaryElseStatement
if
constexpr
(
is_reflectable
<
decltype
(
y
)
>
{})
{
auto
t
=
reflect_tie
(
y
);
return
detail
::
wrap
<
decltype
(
t
)
>
(
t
);
}
else
{
return
detail
::
wrap
<
decltype
(
y
)
>
(
y
);
}
})([](
auto
&&
...
xs
)
{
return
detail
::
auto_tuple
(
xs
.
get
()...);
});
}
}
template
<
class
T
,
class
F
>
template
<
class
T
,
class
F
>
...
...
src/include/migraphx/streamutils.hpp
View file @
23851d62
...
@@ -26,7 +26,9 @@
...
@@ -26,7 +26,9 @@
#include <ostream>
#include <ostream>
#include <algorithm>
#include <algorithm>
#include <migraphx/reflect.hpp>
#include <migraphx/rank.hpp>
#include <migraphx/rank.hpp>
#include <migraphx/requires.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <vector>
#include <vector>
...
@@ -83,6 +85,20 @@ auto stream_write_value_impl(rank<0>, std::ostream& os, const Range& r)
...
@@ -83,6 +85,20 @@ auto stream_write_value_impl(rank<0>, std::ostream& os, const Range& r)
os
<<
"}"
;
os
<<
"}"
;
}
}
template
<
class
T
,
MIGRAPHX_REQUIRES
(
is_reflectable
<
T
>{})
>
void
stream_write_value_impl
(
rank
<
0
>
,
std
::
ostream
&
os
,
const
T
&
x
)
{
char
delim
=
'{'
;
reflect_each
(
x
,
[
&
](
auto
&&
y
,
auto
name
)
{
os
<<
delim
;
os
<<
name
<<
"="
;
stream_write_value_impl
(
rank
<
2
>
{},
os
,
y
);
delim
=
','
;
});
if
(
delim
==
','
)
os
<<
"}"
;
}
}
// namespace detail
}
// namespace detail
template
<
class
T
>
template
<
class
T
>
...
...
src/onnx/parse_batchnorm.cpp
View file @
23851d62
...
@@ -54,18 +54,19 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -54,18 +54,19 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
MIGRAPHX_THROW
(
"PARSE_BATCHNORM: argument scale, bias, mean, or var rank != 1"
);
MIGRAPHX_THROW
(
"PARSE_BATCHNORM: argument scale, bias, mean, or var rank != 1"
);
}
}
if
(
x_lens
.
size
()
==
1
)
auto
x_rank
=
x_lens
.
size
();
if
(
x_rank
==
1
or
x_rank
==
2
)
{
{
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
n
0
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
args
[
3
]);
auto
n
umer
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
args
[
3
]);
auto
d0
=
info
.
add_broadcastable_binary_op
(
"add"
,
args
[
4
],
eps
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
args
[
4
],
eps
);
auto
d
1
=
info
.
add_broadcastable_binary_op
(
"pow"
,
d0
,
rt
);
auto
d
enom
=
info
.
add_broadcastable_binary_op
(
"pow"
,
var_eps
,
rt
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"div"
,
n
0
,
d1
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"div"
,
n
umer
,
denom
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
args
[
1
]);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
args
[
1
]);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
args
[
2
]);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
args
[
2
]);
}
}
else
if
(
x_
lens
.
size
()
>
2
)
else
if
(
x_
rank
>
2
)
{
{
// unsqueeze tensors of shape (C) to broadcast correctly
// unsqueeze tensors of shape (C) to broadcast correctly
std
::
vector
<
int64_t
>
unsqueeze_axes
(
x_lens
.
size
()
-
2
);
std
::
vector
<
int64_t
>
unsqueeze_axes
(
x_lens
.
size
()
-
2
);
...
@@ -89,7 +90,7 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -89,7 +90,7 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
}
}
else
else
{
{
//
num dims either 0 or 2
//
rank == 0
MIGRAPHX_THROW
(
"PARSE_BATCHNORM: rank "
+
std
::
to_string
(
x_lens
.
size
())
+
MIGRAPHX_THROW
(
"PARSE_BATCHNORM: rank "
+
std
::
to_string
(
x_lens
.
size
())
+
" input tensor, unhandled data format"
);
" input tensor, unhandled data format"
);
}
}
...
...
src/targets/gpu/CMakeLists.txt
View file @
23851d62
...
@@ -84,8 +84,6 @@ add_library(migraphx_gpu
...
@@ -84,8 +84,6 @@ add_library(migraphx_gpu
compile_hip.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_hip_code_object.cpp
compiler.cpp
compiler.cpp
convolution.cpp
deconvolution.cpp
device_name.cpp
device_name.cpp
elu.cpp
elu.cpp
fuse_mlir.cpp
fuse_mlir.cpp
...
@@ -110,7 +108,6 @@ add_library(migraphx_gpu
...
@@ -110,7 +108,6 @@ add_library(migraphx_gpu
pad.cpp
pad.cpp
perfdb.cpp
perfdb.cpp
pooling.cpp
pooling.cpp
quant_convolution.cpp
reverse.cpp
reverse.cpp
rnn_variable_seq_lens.cpp
rnn_variable_seq_lens.cpp
rocblas.cpp
rocblas.cpp
...
@@ -146,14 +143,11 @@ register_migraphx_gpu_ops(hip_
...
@@ -146,14 +143,11 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops
(
miopen_
register_migraphx_gpu_ops
(
miopen_
abs
abs
contiguous
contiguous
convolution
deconvolution
elu
elu
int8_conv_pack
int8_conv_pack
leaky_relu
leaky_relu
lrn
lrn
pooling
pooling
quant_convolution
)
)
register_op
(
migraphx_gpu
register_op
(
migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
...
@@ -167,6 +161,9 @@ register_op(migraphx_gpu
...
@@ -167,6 +161,9 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp
)
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_clang_tidy_check
(
migraphx_gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/convolution.cpp
deleted
100644 → 0
View file @
41d4e92b
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
#include <miopen/miopen.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
std
::
vector
<
shape
>
conv_inputs
(
inputs
.
begin
(),
inputs
.
begin
()
+
2
);
check_shapes
{
conv_inputs
,
*
this
}.
max_ndims
(
5
);
return
op
.
normalize_compute_shape
(
conv_inputs
);
}
inline
shape
reshape_if_1d
(
const
shape
&
input
)
{
shape
new_shape
{
input
};
auto
dims
=
new_shape
.
lens
();
if
(
dims
.
size
()
==
3
)
{
std
::
vector
<
size_t
>
new_dims
=
dims
;
new_dims
.
insert
(
new_dims
.
begin
()
+
2
,
1
);
new_shape
=
shape
{
input
.
type
(),
new_dims
};
}
return
new_shape
;
}
argument
miopen_convolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
args
[
0
].
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
args
[
1
].
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
args
[
3
].
implicit
()},
};
if
(
solution_ptr
.
get
()
==
nullptr
)
MIGRAPHX_THROW
(
"MIOpen Convolution : Load MIOpen Solution before running it"
);
auto
status
=
miopenRunSolution
(
miopen_stream_handle
,
solution_ptr
.
get
(),
3
,
tensor_args
,
args
[
2
].
implicit
(),
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: running convolution using find_2.0 failed"
);
return
args
[
3
];
}
#else
// else use immediate mode
if
(
solution_id
==
0
)
MIGRAPHX_THROW
(
"MIOpen Convolution: invalid solution ID"
);
auto
status
=
miopenConvolutionForwardImmediate
(
miopen_stream_handle
,
w_desc
.
get
(),
args
[
1
].
implicit
(),
x_desc
.
get
(),
args
[
0
].
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
workspace_size
,
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: running convolution failed"
);
return
args
[
3
];
#endif
}
shape
miopen_convolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
std
::
size_t
workspace_size
=
0
;
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
&
miopenCreateConvProblem
,
cd
.
get
(),
miopenProblemDirectionForward
);
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution : failed to get solution's workspace size"
);
std
::
size_t
solution_size
;
status
=
miopenGetSolutionSize
(
solution_ptr
.
get
(),
&
solution_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: Failed to fetch solution size"
);
auto
solution_binary
=
std
::
vector
<
char
>
{};
solution_binary
.
resize
(
solution_size
);
status
=
miopenSaveSolution
(
solution_ptr
.
get
(),
solution_binary
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: Saving solution failed"
);
solution_object
=
value
::
binary
{
solution_binary
.
data
(),
solution_size
};
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
#else
// else use immediate find mode
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
#endif
}
void
miopen_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
#ifdef MIGRAPHX_HAS_FIND_2_API
{
(
void
)(
ctx
);
// avoid warnings
(
void
)(
output_shape
);
(
void
)(
inputs
);
// load solution
if
(
solution_ptr
==
nullptr
)
{
miopenSolution_t
ptr
;
auto
status
=
miopenLoadSolution
(
&
ptr
,
reinterpret_cast
<
const
char
*>
(
solution_object
.
data
()),
solution_object
.
size
());
solution_ptr
=
miopen_solution
{
ptr
};
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: loading convolution solution failed"
);
}
}
#else
// Use immediate mode API
{
if
(
cd
==
nullptr
)
cd
=
make_conv
(
op
);
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen Convolution: workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: compile solution failed"
);
}
#endif
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/deconvolution.cpp
deleted
100644 → 0
View file @
41d4e92b
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_deconvolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
std
::
vector
<
shape
>
conv_inputs
(
inputs
.
begin
(),
inputs
.
begin
()
+
2
);
check_shapes
{
conv_inputs
,
*
this
}.
max_ndims
(
5
);
return
op
.
compute_shape
(
conv_inputs
);
}
inline
shape
reshape_if_1d
(
const
shape
&
input
)
{
shape
new_shape
{
input
};
auto
dims
=
new_shape
.
lens
();
if
(
dims
.
size
()
==
3
)
{
std
::
vector
<
size_t
>
new_dims
=
dims
;
new_dims
.
insert
(
new_dims
.
begin
()
+
2
,
1
);
new_shape
=
shape
{
input
.
type
(),
new_dims
};
}
return
new_shape
;
}
argument
miopen_deconvolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
if
(
solution_id
==
0
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: invalid solution ID"
);
auto
status
=
miopenConvolutionForwardImmediate
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
x_desc
.
get
(),
args
[
0
].
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: running convolution failed"
);
return
args
[
3
];
}
shape
miopen_deconvolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x
=
to_gpu
(
generate_argument
(
inputs
[
0
]));
auto
w
=
to_gpu
(
generate_argument
(
inputs
[
1
]));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_deconvolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
cd
==
nullptr
)
cd
=
make_deconv
(
op
);
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]));
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]));
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Deconvolution: compile solution failed"
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/fuse_mlir.cpp
View file @
23851d62
...
@@ -49,7 +49,7 @@ struct mlir_conv
...
@@ -49,7 +49,7 @@ struct mlir_conv
std
::
string
name
()
const
{
return
"gpu::mlir_conv"
;
}
std
::
string
name
()
const
{
return
"gpu::mlir_conv"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
standar
d
();
check_shapes
{
inputs
,
*
this
}.
packed_or_broadcaste
d
();
if
(
mods
.
size
()
!=
1
)
if
(
mods
.
size
()
!=
1
)
MIGRAPHX_THROW
(
"should have one submodule."
);
MIGRAPHX_THROW
(
"should have one submodule."
);
if
(
inputs
.
size
()
<
2
)
if
(
inputs
.
size
()
<
2
)
...
@@ -70,6 +70,9 @@ MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
...
@@ -70,6 +70,9 @@ MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
auto
group
=
v
.
at
(
"group"
).
to
<
int
>
();
auto
group
=
v
.
at
(
"group"
).
to
<
int
>
();
if
(
group
!=
1
)
if
(
group
!=
1
)
return
false
;
return
false
;
// Avoid MLIR assertion: Index < Length && "Invalid index!"
if
(
ins
->
get_shape
().
lens
().
size
()
!=
4
)
return
false
;
return
true
;
return
true
;
}
}
...
@@ -96,9 +99,10 @@ struct find_conv_pointwise
...
@@ -96,9 +99,10 @@ struct find_conv_pointwise
i
.
name
());
i
.
name
());
}))
}))
return
;
return
;
// Only fuse with fp32
for now
// Only fuse with fp32
/fp16
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
i
)
{
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[
&
](
auto
i
)
{
return
i
->
get_shape
().
type
()
!=
shape
::
type_t
::
float_type
;
return
not
contains
({
shape
::
type_t
::
float_type
,
shape
::
type_t
::
half_type
},
i
->
get_shape
().
type
());
}))
}))
return
;
return
;
std
::
sort
(
names
.
begin
(),
names
.
end
());
std
::
sort
(
names
.
begin
(),
names
.
end
());
...
...
src/targets/gpu/fuse_ops.cpp
View file @
23851d62
...
@@ -26,7 +26,6 @@
...
@@ -26,7 +26,6 @@
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/gemm.hpp>
...
@@ -190,10 +189,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
...
@@ -190,10 +189,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
return
false
;
return
false
;
auto
wei
=
ins
->
inputs
().
at
(
1
)
->
get_shape
();
auto
wei
=
ins
->
inputs
().
at
(
1
)
->
get_shape
();
assert
(
wei
.
lens
().
size
()
==
4
);
assert
(
wei
.
lens
().
size
()
==
4
);
auto
conv
=
any_cast
<
miopen_convolution
>
(
ins
->
get_operator
());
auto
miopen_conv_op
=
ins
->
get_operator
().
to_value
();
if
(
conv
.
op
.
group
>
1
)
auto
algo
=
miopen_conv_op
.
at
(
"algo"
).
to
<
miopenConvFwdAlgorithm_t
>
();
auto
conv_op
=
from_value
<
op
::
convolution
>
(
miopen_conv_op
[
"op"
]);
if
(
conv_op
.
group
>
1
)
return
false
;
return
false
;
if
(
wei
.
lens
()[
1
]
>
512
and
conv
.
algo
!=
miopenConvolutionFwdAlgoWinograd
)
if
(
wei
.
lens
()[
1
]
>
512
and
algo
!=
miopenConvolutionFwdAlgoWinograd
)
return
false
;
return
false
;
// Do not fuse non-symmetric input
// Do not fuse non-symmetric input
...
@@ -201,13 +202,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
...
@@ -201,13 +202,12 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
if
(
input_lens
[
2
]
!=
input_lens
[
3
]
or
wei
.
lens
()[
2
]
!=
wei
.
lens
()[
3
])
if
(
input_lens
[
2
]
!=
input_lens
[
3
]
or
wei
.
lens
()[
2
]
!=
wei
.
lens
()[
3
])
return
false
;
return
false
;
auto
op
=
conv
.
op
;
// Dont fuse winograd for non-3x3s since there is no fused windograd for those configs
// Dont fuse winograd for non-3x3s since there is no fused windograd for those configs
if
(
conv
.
algo
==
miopenConvolutionFwdAlgoWinograd
and
wei
.
lens
()[
2
]
!=
3
and
if
(
algo
==
miopenConvolutionFwdAlgoWinograd
and
wei
.
lens
()[
2
]
!=
3
and
wei
.
lens
()[
3
]
!=
3
and
wei
.
lens
()[
3
]
!=
3
and
contains
({{
1
,
1
}},
op
.
stride
))
contains
({{
1
,
1
}},
conv_
op
.
stride
))
return
false
;
return
false
;
return
contains
({{
0
,
0
,
0
,
0
},
{
1
,
1
,
1
,
1
},
{
2
,
2
,
2
,
2
}},
op
.
padding
)
and
return
contains
({{
0
,
0
,
0
,
0
},
{
1
,
1
,
1
,
1
},
{
2
,
2
,
2
,
2
}},
conv_
op
.
padding
)
and
contains
({{
0
,
0
},
{
1
,
1
}},
op
.
stride
)
and
contains
({{
1
,
1
}},
op
.
dilation
);
contains
({{
0
,
0
},
{
1
,
1
}},
conv_
op
.
stride
)
and
contains
({{
1
,
1
}},
conv_
op
.
dilation
);
}
}
void
move_broadcasted_back
(
std
::
vector
<
instruction_ref
>&
args
)
void
move_broadcasted_back
(
std
::
vector
<
instruction_ref
>&
args
)
...
@@ -462,7 +462,7 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r)
...
@@ -462,7 +462,7 @@ void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r)
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
input_ins
=
conv_ins
->
inputs
().
at
(
0
);
auto
input_ins
=
conv_ins
->
inputs
().
at
(
0
);
auto
weights_ins
=
conv_ins
->
inputs
().
at
(
1
);
auto
weights_ins
=
conv_ins
->
inputs
().
at
(
1
);
auto
conv_op
=
any_cast
<
miopen_
convolution
>
(
conv_ins
->
get_operator
()).
op
;
auto
conv_op
=
from_value
<
op
::
convolution
>
(
(
conv_ins
->
get_operator
()).
to_value
()[
"op"
])
;
auto
alloc_ins
=
ins
->
inputs
().
back
();
auto
alloc_ins
=
ins
->
inputs
().
back
();
auto
old_ws_ins
=
conv_ins
->
inputs
().
at
(
2
);
auto
old_ws_ins
=
conv_ins
->
inputs
().
at
(
2
);
...
@@ -528,7 +528,7 @@ struct find_conv_pointwise
...
@@ -528,7 +528,7 @@ struct find_conv_pointwise
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
input_ins
=
conv_ins
->
inputs
().
at
(
0
);
auto
input_ins
=
conv_ins
->
inputs
().
at
(
0
);
auto
weights_ins
=
conv_ins
->
inputs
().
at
(
1
);
auto
weights_ins
=
conv_ins
->
inputs
().
at
(
1
);
auto
conv_op
=
any_cast
<
miopen_
convolution
>
(
conv_ins
->
get_operator
()
).
op
;
auto
conv_op
=
from_value
<
op
::
convolution
>
(
conv_ins
->
get_operator
()
.
to_value
()[
"op"
])
;
auto
alloc_ins
=
ins
->
inputs
().
back
();
auto
alloc_ins
=
ins
->
inputs
().
back
();
module_ref
pm
=
ins
->
module_inputs
().
front
();
module_ref
pm
=
ins
->
module_inputs
().
front
();
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
23851d62
...
@@ -25,18 +25,40 @@
...
@@ -25,18 +25,40 @@
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
inline
shape
reshape_if_1d
(
const
shape
&
input
)
{
shape
new_shape
{
input
};
auto
dims
=
new_shape
.
lens
();
if
(
dims
.
size
()
==
3
)
{
std
::
vector
<
size_t
>
new_dims
=
dims
;
new_dims
.
insert
(
new_dims
.
begin
()
+
2
,
1
);
new_shape
=
shape
{
input
.
type
(),
new_dims
};
}
return
new_shape
;
}
template
<
class
Op
>
struct
miopen_convolution
struct
miopen_convolution
{
{
op
::
convolution
op
;
Op
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
shared
<
convolution_descriptor
>
cd
=
nullptr
;
miopenConvFwdAlgorithm_t
algo
{};
miopenConvFwdAlgorithm_t
algo
{};
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
...
@@ -48,29 +70,276 @@ struct miopen_convolution
...
@@ -48,29 +70,276 @@ struct miopen_convolution
template
<
class
Self
,
class
F
>
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
static
auto
reflect
(
Self
&
self
,
F
f
)
{
{
return
pack
(
f
(
self
.
op
.
padding
,
"padding"
),
return
pack
(
f
(
self
.
op
,
"op"
),
f
(
self
.
op
.
stride
,
"stride"
),
f
(
self
.
op
.
dilation
,
"dilation"
),
f
(
self
.
op
.
group
,
"group"
),
f
(
self
.
op
.
padding_mode
,
"padding_mode"
),
#ifdef MIGRAPHX_HAS_FIND_2_API
#ifdef MIGRAPHX_HAS_FIND_2_API
f
(
self
.
solution_object
,
"solution_object"
),
f
(
self
.
solution_object
,
"solution_object"
),
#endif
#endif
f
(
self
.
algo
,
"algo"
),
f
(
self
.
int8_x4_format
,
"int8_x4_format"
),
f
(
self
.
solution_id
,
"solution_id"
));
f
(
self
.
solution_id
,
"solution_id"
));
}
}
std
::
string
name
()
const
{
return
"gpu::convolution"
;
}
std
::
string
name
()
const
{
return
"gpu::"
+
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
inline
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
op
}.
has
(
4
).
standard
();
std
::
vector
<
shape
>
conv_inputs
(
inputs
.
begin
(),
inputs
.
begin
()
+
2
);
check_shapes
{
conv_inputs
,
op
}.
max_ndims
(
5
);
return
migraphx
::
compute_shape
<
Op
>
(
op
,
conv_inputs
);
}
argument
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
{
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
);
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
args
[
0
].
get_shape
()),
int8_x4_format
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
args
[
1
].
get_shape
()),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
auto
workspace_size
=
args
[
2
].
get_shape
().
bytes
();
#ifdef MIGRAPHX_HAS_FIND_2_API
{
const
miopenTensorArgument_t
tensor_args
[
3
]
=
{
{
miopenTensorConvolutionX
,
nullptr
,
args
[
0
].
implicit
()},
{
miopenTensorConvolutionW
,
nullptr
,
args
[
1
].
implicit
()},
{
miopenTensorConvolutionY
,
nullptr
,
args
[
3
].
implicit
()},
};
if
(
solution_ptr
.
get
()
==
nullptr
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : Load MIOpen Solution before running it"
);
auto
status
=
miopenRunSolution
(
miopen_stream_handle
,
solution_ptr
.
get
(),
3
,
tensor_args
,
args
[
2
].
implicit
(),
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : running convolution using find_2.0 failed"
);
return
args
[
3
];
}
#else
// else use immediate mode
if
(
solution_id
==
0
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : invalid solution ID"
);
auto
status
=
miopenConvolutionForwardImmediate
(
miopen_stream_handle
,
w_desc
.
get
(),
args
[
1
].
implicit
(),
x_desc
.
get
(),
args
[
0
].
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
workspace_size
,
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": running convolution failed"
);
return
args
[
3
];
#endif
}
inline
void
set_conv_descriptor
()
{
if
(
cd
==
nullptr
)
{
cd
=
(
op
.
name
()
==
"deconvolution"
)
?
make_deconv
(
op
)
:
make_conv
(
op
);
}
}
value
compile
(
migraphx
::
context
&
ctx
,
const
shape
&
output
,
const
std
::
vector
<
shape
>&
input
)
{
set_conv_descriptor
();
auto
ws
=
find
(
any_cast
<
migraphx
::
gpu
::
context
>
(
ctx
),
output
,
input
);
return
{{
"workspace"
,
ws
.
bytes
()}};
}
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
std
::
size_t
workspace_size
=
0
;
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto
conv_problem
=
make_obj
<
miopen_problem
>
(
&
miopenCreateConvProblem
,
cd
.
get
(),
miopenProblemDirectionForward
);
set_tensor_descriptor
(
miopenTensorConvolutionX
,
x_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionW
,
w_desc
,
conv_problem
);
set_tensor_descriptor
(
miopenTensorConvolutionY
,
y_desc
,
conv_problem
);
auto
*
miopen_stream_handle
=
ctx
.
get_stream
().
get_miopen
();
solution_ptr
=
find_solution
(
miopen_stream_handle
,
conv_problem
.
get
());
auto
status
=
miopenGetSolutionWorkspaceSize
(
solution_ptr
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : failed to get solution's workspace size"
);
std
::
size_t
solution_size
;
status
=
miopenGetSolutionSize
(
solution_ptr
.
get
(),
&
solution_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
": Failed to fetch solution size"
);
auto
solution_binary
=
std
::
vector
<
char
>
{};
solution_binary
.
resize
(
solution_size
);
status
=
miopenSaveSolution
(
solution_ptr
.
get
(),
solution_binary
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
": Saving solution failed"
);
solution_object
=
value
::
binary
{
solution_binary
.
data
(),
solution_size
};
return
shape
{
shape
::
int8_type
,
{
workspace_size
}};
}
#else
auto
status
=
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen"
+
op
.
name
()
+
" : Failed to get forward workspace size"
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
" : find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
#endif
}
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
shape
>&
inputs
)
{
#ifdef MIGRAPHX_HAS_FIND_2_API
{
(
void
)(
ctx
);
// avoid warnings
(
void
)(
output_shape
);
(
void
)(
inputs
);
// load solution
if
(
solution_ptr
==
nullptr
)
{
miopenSolution_t
ptr
;
auto
status
=
miopenLoadSolution
(
&
ptr
,
reinterpret_cast
<
const
char
*>
(
solution_object
.
data
()),
solution_object
.
size
());
solution_ptr
=
miopen_solution
{
ptr
};
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": loading convolution solution failed"
);
}
}
#else
// Use immediate mode API
{
set_conv_descriptor
();
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen "
+
op
.
name
()
+
": workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
0
]),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
reshape_if_1d
(
inputs
[
1
]),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
reshape_if_1d
(
output_shape
));
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Convolution: compile solution failed"
);
}
#endif
}
inline
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
}
}
};
inline
shape
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
return
s
;
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/mlir.hpp
View file @
23851d62
...
@@ -36,7 +36,8 @@ struct module;
...
@@ -36,7 +36,8 @@ struct module;
namespace
gpu
{
namespace
gpu
{
std
::
string
dump_mlir
(
const
module
&
m
);
std
::
string
dump_mlir
(
const
module
&
m
);
code_object_op
compile_mlir
(
const
context
&
ctx
,
const
module
&
m
);
code_object_op
compile_mlir
(
const
context
&
ctx
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
);
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
insert_mlir
(
module
&
m
,
instruction_ref
ins
,
instruction_ref
ins
,
...
...
src/targets/gpu/include/migraphx/gpu/perfdb.hpp
View file @
23851d62
...
@@ -41,7 +41,7 @@ struct problem_params
...
@@ -41,7 +41,7 @@ struct problem_params
shape
output
;
shape
output
;
};
};
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
);
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
,
bool
xdlops
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
deleted
100644 → 0
View file @
41d4e92b
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_quant_convolution
{
op
::
quant_convolution
op
;
bool
int8_x4_format
=
false
;
shared
<
convolution_descriptor
>
cd
;
miopenConvFwdAlgorithm_t
algo
{};
uint64_t
solution_id
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
// TODO: Add algo
return
pack_join
(
migraphx
::
reflect
(
self
.
op
,
f
),
pack
(
f
(
self
.
int8_x4_format
,
"int8_x4_format"
)));
}
std
::
string
name
()
const
{
return
"gpu::quant_convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
private:
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/jit/mlir.cpp
View file @
23851d62
...
@@ -41,7 +41,7 @@ struct mlir_compiler : compiler<mlir_compiler>
...
@@ -41,7 +41,7 @@ struct mlir_compiler : compiler<mlir_compiler>
{
{
auto
*
smod
=
ins
->
module_inputs
().
front
();
auto
*
smod
=
ins
->
module_inputs
().
front
();
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
assert
(
smod
->
get_parameter_names
().
size
()
==
ins
->
inputs
().
size
()
-
1
);
return
insert
(
compile_mlir
(
ctx
,
*
smod
));
return
insert
(
compile_mlir
(
ctx
,
*
smod
,
ins
->
inputs
()
));
}
}
compiler_replace
insert
(
code_object_op
co
)
const
compiler_replace
insert
(
code_object_op
co
)
const
...
...
src/targets/gpu/lowering.cpp
View file @
23851d62
...
@@ -39,12 +39,10 @@
...
@@ -39,12 +39,10 @@
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
...
@@ -114,15 +112,15 @@ struct miopen_apply
...
@@ -114,15 +112,15 @@ struct miopen_apply
add_extend_op
(
"scatter_none"
);
add_extend_op
(
"scatter_none"
);
add_extend_op
(
"topk"
);
add_extend_op
(
"topk"
);
add_convolution_op
();
add_convolution_op
<
op
::
convolution
>
(
"convolution"
);
add_deconvolution_op
();
add_convolution_op
<
op
::
deconvolution
>
(
"deconvolution"
);
add_convolution_op
<
op
::
quant_convolution
>
(
"quant_convolution"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_if_op
();
add_if_op
();
add_loop_op
();
add_loop_op
();
add_neg_op
();
add_neg_op
();
add_nms_op
();
add_nms_op
();
add_quant_convolution_op
();
}
}
void
copy_params
()
const
void
copy_params
()
const
...
@@ -230,38 +228,6 @@ struct miopen_apply
...
@@ -230,38 +228,6 @@ struct miopen_apply
return
mod
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
return
mod
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
}
}
void
add_convolution_op
()
{
apply_map
.
emplace
(
"convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
convolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
void
add_deconvolution_op
()
{
apply_map
.
emplace
(
"deconvolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
deconvolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_deconvolution
{
op
,
make_deconv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
template
<
typename
Op
>
template
<
typename
Op
>
void
add_gemm_op
(
const
std
::
string
&
name
)
void
add_gemm_op
(
const
std
::
string
&
name
)
{
{
...
@@ -275,31 +241,33 @@ struct miopen_apply
...
@@ -275,31 +241,33 @@ struct miopen_apply
});
});
}
}
void
add_quant_convolution_op
()
template
<
typename
Op
>
void
add_convolution_op
(
const
std
::
string
&
name
)
{
{
apply_map
.
emplace
(
"quant_convolution"
,
[
=
](
instruction_ref
ins
)
{
apply_map
.
emplace
(
name
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_convolution
>
(
ins
->
get_operator
());
operation
conv
=
shape
ws
;
miopen_convolution
<
Op
>
{
any_cast
<
Op
>
(
ins
->
get_operator
()),
int8_x4_format
};
miopen_quant_convolution
conv
;
migraphx
::
context
ctx
=
get_context
();
auto
compile_quant_conv_with_format
=
[
&
](
bool
format
)
{
size_t
ws_bytes
=
0
;
conv
=
miopen_quant_convolution
{
op
,
format
,
make_conv
(
op
)};
auto
compile_conv_with_format
=
[
&
](
bool
format
)
{
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
conv
=
miopen_convolution
<
Op
>
{
any_cast
<
Op
>
(
ins
->
get_operator
()),
format
};
auto
ws
=
conv
.
compile
(
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
ws_bytes
=
ws
.
get
(
"workspace"
,
0
);
};
};
try
try
{
{
// for the regular convolution and deconvolution, this try would always succeed
compile_
quant_
conv_with_format
(
int8_x4_format
);
compile_conv_with_format
(
int8_x4_format
);
}
}
catch
(
migraphx
::
exception
&
)
catch
(
migraphx
::
exception
&
)
{
{
// In case no solver supports the default format, retry using the other format.
// In case no solver supports the default format, retry using the other format.
compile_
quant_
conv_with_format
(
not
int8_x4_format
);
compile_conv_with_format
(
not
int8_x4_format
);
}
}
auto
args
=
ins
->
inputs
();
auto
args
=
ins
->
inputs
();
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
auto
workspace
=
insert_allocation
(
ins
,
shape
{
shape
::
int8_type
,
{
ws_bytes
}});
return
mod
->
replace_instruction
(
ins
,
conv
,
args
[
0
],
args
[
1
],
workspace
,
output
);
return
mod
->
replace_instruction
(
ins
,
conv
,
args
[
0
],
args
[
1
],
workspace
,
output
);
});
});
}
}
...
...
src/targets/gpu/mlir.cpp
View file @
23851d62
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include "migraphx/make_op.hpp"
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
#ifdef MIGRAPHX_MLIR
#ifdef MIGRAPHX_MLIR
...
@@ -43,8 +44,9 @@
...
@@ -43,8 +44,9 @@
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/permutation.hpp>
#include <deque>
#include <deque>
#include <variant>
#include <variant>
...
@@ -194,7 +196,6 @@ struct mlir_program
...
@@ -194,7 +196,6 @@ struct mlir_program
MlirType
make_tensor
(
const
shape
&
s
)
const
MlirType
make_tensor
(
const
shape
&
s
)
const
{
{
assert
(
s
.
standard
());
std
::
vector
<
int64_t
>
lens
(
s
.
lens
().
begin
(),
s
.
lens
().
end
());
std
::
vector
<
int64_t
>
lens
(
s
.
lens
().
begin
(),
s
.
lens
().
end
());
return
mlirRankedTensorTypeGet
(
return
mlirRankedTensorTypeGet
(
lens
.
size
(),
lens
.
data
(),
make_type
(
s
.
type
()),
mlirAttributeGetNull
());
lens
.
size
(),
lens
.
data
(),
make_type
(
s
.
type
()),
mlirAttributeGetNull
());
...
@@ -502,11 +503,12 @@ struct mlir_program
...
@@ -502,11 +503,12 @@ struct mlir_program
{
{
pp
=
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
std
::
string
tuned
=
get_tune_params
();
// check if HW supports xdlops
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_name
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
if
(
not
tuned
.
empty
())
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
// check if HW supports xdlops
if
(
xdlops
)
if
(
contains
(
get_xdlops_archs
(),
target_name
))
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
}
...
@@ -571,7 +573,7 @@ struct mlir_program
...
@@ -571,7 +573,7 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
}
std
::
string
get_tune_params
()
{
return
get_mlir_perf_for_conv
(
pp
);
}
std
::
string
get_tune_params
(
bool
xdlops
)
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
mlir_context
ctx
;
mlir_context
ctx
;
MlirLocation
location
;
MlirLocation
location
;
...
@@ -589,8 +591,54 @@ std::string dump_mlir(const module& m)
...
@@ -589,8 +591,54 @@ std::string dump_mlir(const module& m)
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
}
}
code_object_op
compile_mlir
(
const
context
&
,
const
module
&
m
)
void
adjust_param_shapes
(
module
&
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
auto
names
=
m
.
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
auto
i
:
range
(
names
.
size
()))
{
const
auto
&
name
=
names
[
i
];
const
auto
&
input
=
inputs
[
i
]
->
get_shape
();
auto
param
=
m
.
get_parameter
(
name
);
if
(
input
.
standard
())
continue
;
auto
lens
=
input
.
lens
();
auto
strides
=
input
.
strides
();
std
::
vector
<
operation
>
ops
;
if
(
input
.
transposed
())
{
auto
perm
=
find_permutation
(
input
);
auto
iperm
=
invert_permutation
(
perm
);
lens
=
reorder_dims
(
lens
,
iperm
);
strides
=
reorder_dims
(
strides
,
iperm
);
ops
.
push_back
(
make_op
(
"transpose"
,
{{
"permutation"
,
perm
}}));
}
if
(
input
.
broadcasted
())
{
std
::
transform
(
lens
.
begin
(),
lens
.
end
(),
strides
.
begin
(),
lens
.
begin
(),
[](
auto
len
,
auto
stride
)
->
std
::
size_t
{
if
(
stride
==
0
)
return
1
;
return
len
;
});
ops
.
push_back
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
input
.
lens
()}}));
}
auto
new_param
=
std
::
accumulate
(
ops
.
begin
(),
ops
.
end
(),
m
.
add_parameter
(
name
+
".0"
,
shape
{
input
.
type
(),
lens
}),
[
&
](
auto
x
,
auto
op
)
{
return
m
.
insert_instruction
(
param
,
op
,
x
);
});
m
.
replace_instruction
(
param
,
new_param
);
m
.
remove_instruction
(
param
);
}
}
code_object_op
compile_mlir
(
const
context
&
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
{
adjust_param_shapes
(
m
,
inputs
);
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
...
@@ -662,13 +710,19 @@ instruction_ref insert_mlir(module& m,
...
@@ -662,13 +710,19 @@ instruction_ref insert_mlir(module& m,
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
code_object_op
compile_mlir
(
const
context
&
,
const
module
&
)
{
return
{};
}
template
<
class
T
>
template
<
class
T
>
void
use
(
T
&
)
void
use
(
T
&
)
{
{
}
}
// Disabling clang-tidy warning on non-real useage.
// NOLINTBEGIN(performance-unnecessary-value-param)
code_object_op
compile_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
instruction_ref
>&
)
{
return
{};
}
// NOLINTEND(performance-unnecessary-value-param)
instruction_ref
instruction_ref
// cppcheck-suppress funcArgNamesDifferent
// cppcheck-suppress funcArgNamesDifferent
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
...
...
src/targets/gpu/perfdb.cpp
View file @
23851d62
...
@@ -108,16 +108,17 @@ auto query_miopen_db(const std::string& query)
...
@@ -108,16 +108,17 @@ auto query_miopen_db(const std::string& query)
}
// namespace
}
// namespace
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
)
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
,
bool
xdlops
)
{
{
std
::
string
query
=
"select P.* \
std
::
string
solver
=
xdlops
?
"ConvMlirIgemmFwdXdlops"
:
"ConvMlirIgemmFwd"
;
std
::
string
query
=
"select P.* \
from perf_db P, config C \
from perf_db P, config C \
where P.config = C.id AND \
where P.config = C.id AND \
P.solver = '
ConvMlirIgemmFwdXdlops
' AND \
P.solver = '
${solver}
' AND \
${config}"
;
${config}"
;
auto
results
=
auto
results
=
query_miopen_db
(
query_miopen_db
(
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}}));
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}
,
{
"solver"
,
solver
}
}));
if
(
results
.
empty
())
if
(
results
.
empty
())
return
""
;
return
""
;
return
results
.
front
().
at
(
"params"
);
return
results
.
front
().
at
(
"params"
);
...
...
src/targets/gpu/quant_convolution.cpp
deleted
100644 → 0
View file @
41d4e92b
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_quant_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
return
op
.
normalize_compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
}
argument
miopen_quant_convolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
float
beta
=
0
;
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: run convolution forward failed"
);
}
return
args
[
3
];
}
shape
miopen_quant_convolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_quant_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
cd
==
nullptr
)
cd
=
make_conv
(
op
);
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: compile solution failed"
);
}
shape
miopen_quant_convolution
::
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"PACK_INT8_SHAPE: only process int8_type"
);
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
test/api/test_custom_op_gpu.cpp
View file @
23851d62
...
@@ -55,7 +55,8 @@ struct half_copy_host final : migraphx::experimental_custom_op_base
...
@@ -55,7 +55,8 @@ struct half_copy_host final : migraphx::experimental_custom_op_base
hipMemcpyHostToHost
,
hipMemcpyHostToHost
,
ctx
.
get_queue
<
hipStream_t
>
()));
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
output_buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemsetAsync
(
output_buffer_ptr
,
0
,
copy_bytes
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
inputs
[
1
];
return
inputs
[
1
];
}
}
...
@@ -97,7 +98,8 @@ struct half_copy_device final : migraphx::experimental_custom_op_base
...
@@ -97,7 +98,8 @@ struct half_copy_device final : migraphx::experimental_custom_op_base
hipMemcpyDeviceToDevice
,
hipMemcpyDeviceToDevice
,
ctx
.
get_queue
<
hipStream_t
>
()));
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
output_buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemsetAsync
(
output_buffer_ptr
,
0
,
copy_bytes
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
inputs
[
1
];
return
inputs
[
1
];
}
}
...
@@ -124,7 +126,7 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
...
@@ -124,7 +126,7 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
argument
virtual
migraphx
::
argument
compute
(
migraphx
::
context
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
compute
(
migraphx
::
context
ctx
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
{
// This custom op simply sets first half size_bytes of the input 0, and rest of the half
// This custom op simply sets first half size_bytes of the input 0, and rest of the half
// bytes are copied. for this custom_op, it does its computation on the "device". Therefore,
// bytes are copied. for this custom_op, it does its computation on the "device". Therefore,
...
@@ -133,7 +135,8 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
...
@@ -133,7 +135,8 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
copy_bytes
=
input_bytes
/
2
;
auto
copy_bytes
=
input_bytes
/
2
;
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemsetAsync
(
buffer_ptr
,
0
,
copy_bytes
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
inputs
[
0
];
return
inputs
[
0
];
}
}
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment