Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3eaa0969
Commit
3eaa0969
authored
Feb 01, 2023
by
Alan Turner
Browse files
Merge remote-tracking branch 'origin/bert-opt' into HEAD
parents
e3d0c287
22aa9c5e
Changes
55
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
628 additions
and
137 deletions
+628
-137
.github/workflows/sync-onnxrt-main.yaml
.github/workflows/sync-onnxrt-main.yaml
+46
-0
Dockerfile
Dockerfile
+28
-6
Jenkinsfile
Jenkinsfile
+6
-0
src/CMakeLists.txt
src/CMakeLists.txt
+1
-1
src/include/migraphx/convolution.hpp
src/include/migraphx/convolution.hpp
+96
-0
src/include/migraphx/match/layernorm.hpp
src/include/migraphx/match/layernorm.hpp
+12
-5
src/include/migraphx/op/convolution.hpp
src/include/migraphx/op/convolution.hpp
+34
-0
src/include/migraphx/op/gather.hpp
src/include/migraphx/op/gather.hpp
+40
-15
src/include/migraphx/op/quant_convolution.hpp
src/include/migraphx/op/quant_convolution.hpp
+13
-0
src/include/migraphx/optimize_module.hpp
src/include/migraphx/optimize_module.hpp
+48
-0
src/module.cpp
src/module.cpp
+2
-1
src/onnx/parse_gemm.cpp
src/onnx/parse_gemm.cpp
+28
-24
src/onnx/parse_trilu.cpp
src/onnx/parse_trilu.cpp
+90
-0
src/optimize_module.cpp
src/optimize_module.cpp
+49
-0
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+40
-5
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+59
-57
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+30
-15
src/targets/gpu/compile_hip_code_object.cpp
src/targets/gpu/compile_hip_code_object.cpp
+1
-1
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+3
-5
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+2
-2
No files found.
.github/workflows/sync-onnxrt-main.yaml
0 → 100644
View file @
3eaa0969
name
:
Onnxruntime main weekly sync
on
:
schedule
:
-
cron
:
"
05
17
*
*
1"
jobs
:
runs-on
:
ubuntu-latest
sync
:
steps
:
-
uses
:
actions/checkout@v3
with
:
ref
:
develop
path
:
../
get_date
:
steps
:
-
run
:
echo "::set-output name=date::$(date +'%Y-%m-%d')"
update_file
:
needs
:
[
sync get_date
]
steps
:
-
run
:
git clone https://github.com/microsoft/onnxruntime.git && cd onnxruntime && git rev-parse HEAD >> ../test/onnx/.onnxrt-commit
Add_commit
:
needs
:
update_file
steps
:
-
name
:
Add & Commit
uses
:
EndBug/add-and-commit@v9.1.1
with
:
new_branch
:
onnxruntime-sync-${{ steps.date.outputs.date }}
add
:
../test/onnx/.onnxrt-commit
message
:
Update Onnxruntime commit to latest release
default_author
:
github_actions
push
:
true
PR
:
needs
:
Add_commit
steps
:
-
name
:
GitHub Action for creating Pull Requests
uses
:
devops-infra/action-pull-request@v0.5.3
with
:
github_token
:
${{ secrets.GITHUB_TOKEN }}
title
:
Sync Onnxruntime main
reviewer
:
pfultz2, causten
assignee
:
TedThemistokleous
label
:
automatic, onnxruntime
target_branch
:
develop
Dockerfile
View file @
3eaa0969
...
@@ -5,6 +5,10 @@ ARG PREFIX=/usr/local
...
@@ -5,6 +5,10 @@ ARG PREFIX=/usr/local
# Support multiarch
# Support multiarch
RUN
dpkg
--add-architecture
i386
RUN
dpkg
--add-architecture
i386
# Install rocm key
RUN
apt-get update
&&
apt-get
install
-y
gnupg2
--no-install-recommends
curl
&&
\
curl
-sL
http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
# Add rocm repository
# Add rocm repository
RUN
sh
-c
'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.3/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
RUN
sh
-c
'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.3/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
...
@@ -32,10 +36,27 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
...
@@ -32,10 +36,27 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
libnuma-dev
\
libnuma-dev
\
miopen-hip
\
miopen-hip
\
rocblas
\
rocblas
\
hipfft
\
rocthrust
\
rocrand
\
hipsparse
\
rccl
\
rccl-dev
\
rocm-smi-lib
\
rocm-dev
\
roctracer-dev
\
hipcub
\
hipblas
\
hipify-clang
\
half
\
libssl-dev
\
zlib1g-dev
&&
\
zlib1g-dev
&&
\
apt-get clean
&&
\
apt-get clean
&&
\
rm
-rf
/var/lib/apt/lists/
*
rm
-rf
/var/lib/apt/lists/
*
# add this for roctracer dependancies
RUN
pip3
install
CppHeaderParser
packaging
==
22.0
# Workaround broken rocm packages
# Workaround broken rocm packages
RUN
ln
-s
/opt/rocm-
*
/opt/rocm
RUN
ln
-s
/opt/rocm-
*
/opt/rocm
RUN
echo
"/opt/rocm/lib"
>
/etc/ld.so.conf.d/rocm.conf
RUN
echo
"/opt/rocm/lib"
>
/etc/ld.so.conf.d/rocm.conf
...
@@ -72,18 +93,19 @@ RUN /download_models.sh && rm /download_models.sh
...
@@ -72,18 +93,19 @@ RUN /download_models.sh && rm /download_models.sh
# Install latest ccache version
# Install latest ccache version
RUN
cget
-p
$PREFIX
install
facebook/zstd@v1.4.5
-X
subdir
-DCMAKE_DIR
=
build/cmake
RUN
cget
-p
$PREFIX
install
facebook/zstd@v1.4.5
-X
subdir
-DCMAKE_DIR
=
build/cmake
RUN
cget
-p
$PREFIX
install
ccache@v4.1
-DENABLE_TESTING
=
OFF
RUN
cget
-p
$PREFIX
install
ccache@v4.1
-DENABLE_TESTING
=
OFF
RUN
cget
-p
/opt/cmake
install
kitware/cmake@v3.24.3
# Install newer cmake for onnx runtime
COPY
./test/onnx/.onnxrt-commit /
ARG
CMAKE_VERSION=3.24.2
RUN
cget
-p
/opt/cmake
install
-X
binary https://github.com/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/cmake-
${
CMAKE_VERSION
}
-Linux-x86_64
.tar.gz
ARG
ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
ARG
ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
ARG
ONNXRUNTIME_BRANCH=main
ARG
ONNXRUNTIME_BRANCH=main
ARG
ONNXRUNTIME_COMMIT=24f1bd6156cf5968bbc76dfb0e801a9b9c56b9fc
ARG
ONNXRUNTIME_COMMIT
RUN
git clone
--single-branch
--branch
${
ONNXRUNTIME_BRANCH
}
--recursive
${
ONNXRUNTIME_REPO
}
onnxruntime
&&
\
RUN
git clone
--single-branch
--branch
${
ONNXRUNTIME_BRANCH
}
--recursive
${
ONNXRUNTIME_REPO
}
onnxruntime
&&
\
cd
onnxruntime
&&
\
cd
onnxruntime
&&
\
git checkout
${
ONNXRUNTIME_COMMIT
}
&&
\
if
[
-z
"
$ONNXRUNTIME_COMMIT
"
]
;
then
git checkout
$(
cat
/.onnxrt-commit
)
;
else
git checkout
${
ONNXRUNTIME_COMMIT
}
;
fi
&&
\
/bin/sh dockerfiles/scripts/install_common_deps.sh
/bin/sh /onnxruntime/dockerfiles/scripts/install_common_deps.sh
ADD
tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
ADD
tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
...
...
Jenkinsfile
View file @
3eaa0969
...
@@ -15,11 +15,13 @@ def rocmtestnode(Map conf) {
...
@@ -15,11 +15,13 @@ def rocmtestnode(Map conf) {
def
compiler
=
bconf
.
get
(
"compiler"
,
"/opt/rocm/llvm/bin/clang++"
)
def
compiler
=
bconf
.
get
(
"compiler"
,
"/opt/rocm/llvm/bin/clang++"
)
def
flags
=
bconf
.
get
(
"flags"
,
""
)
def
flags
=
bconf
.
get
(
"flags"
,
""
)
def
gpu_debug
=
bconf
.
get
(
"gpu_debug"
,
"0"
)
def
gpu_debug
=
bconf
.
get
(
"gpu_debug"
,
"0"
)
def
hiprtc_workarounds
=
bconf
.
get
(
"hiprtc_workarounds"
,
"0"
)
def
cmd
=
"""
def
cmd
=
"""
ulimit -c unlimited
ulimit -c unlimited
echo "leak:dnnl::impl::malloc" > suppressions.txt
echo "leak:dnnl::impl::malloc" > suppressions.txt
export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt"
export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt"
export MIGRAPHX_GPU_DEBUG=${gpu_debug}
export MIGRAPHX_GPU_DEBUG=${gpu_debug}
export MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=${hiprtc_workarounds}
export CXX=${compiler}
export CXX=${compiler}
export CXXFLAGS='-Werror'
export CXXFLAGS='-Werror'
env
env
...
@@ -110,6 +112,10 @@ rocmtest clang_debug: rocmnode('vega') { cmake_build ->
...
@@ -110,6 +112,10 @@ rocmtest clang_debug: rocmnode('vega') { cmake_build ->
cmake_build
(
flags:
"-DCMAKE_BUILD_TYPE=release"
)
cmake_build
(
flags:
"-DCMAKE_BUILD_TYPE=release"
)
stash
includes:
'build/*.deb'
,
name:
'migraphx-package'
stash
includes:
'build/*.deb'
,
name:
'migraphx-package'
}
}
},
hiprtc_gpu_debug:
rocmnode
(
'vega'
)
{
cmake_build
->
stage
(
'HipRTC GPU Debug'
)
{
cmake_build
(
flags:
"-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_USE_HIPRTC=On"
,
gpu_debug:
true
,
hiprtc_workarounds:
true
)
}
},
mlir_debug:
rocmnode
(
'vega'
)
{
cmake_build
->
},
mlir_debug:
rocmnode
(
'vega'
)
{
cmake_build
->
stage
(
'MLIR Debug'
)
{
stage
(
'MLIR Debug'
)
{
def
sanitizers
=
"undefined"
def
sanitizers
=
"undefined"
...
...
src/CMakeLists.txt
View file @
3eaa0969
...
@@ -64,7 +64,7 @@ add_library(migraphx
...
@@ -64,7 +64,7 @@ add_library(migraphx
normalize_ops.cpp
normalize_ops.cpp
op_enums.cpp
op_enums.cpp
operation.cpp
operation.cpp
optimize.cpp
optimize
_module
.cpp
opt/memory_coloring.cpp
opt/memory_coloring.cpp
opt/memory_coloring_impl.cpp
opt/memory_coloring_impl.cpp
pad_calc.cpp
pad_calc.cpp
...
...
src/include/migraphx/convolution.hpp
0 → 100644
View file @
3eaa0969
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#include <migraphx/config.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/tensor_view.hpp>
#include <vector>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
template
<
class
Output
,
class
T
,
class
Padding
,
class
Stride
>
void
convolution
(
Output
output
,
T
input
,
T
weights
,
Padding
padding
,
Stride
stride
,
int
group
)
{
auto
output_shape
=
output
.
get_shape
();
auto
in_lens
=
input
.
get_shape
().
lens
();
auto
wei_lens
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei_lens
[
0
];
auto
wei_c
=
wei_lens
[
1
];
std
::
vector
<
std
::
size_t
>
win_size
(
wei_lens
.
begin
()
+
1
,
wei_lens
.
end
());
par_for
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx_o
=
output_shape
.
multi
(
i
);
auto
w
=
idx_o
[
1
];
auto
n_dim
=
idx_o
.
size
();
std
::
vector
<
std
::
ptrdiff_t
>
win_start
;
for
(
std
::
size_t
dim
=
2
;
dim
<
n_dim
;
++
dim
)
{
auto
d_2
=
dim
-
2
;
win_start
.
push_back
(
std
::
ptrdiff_t
(
idx_o
[
dim
]
*
stride
[
d_2
])
-
std
::
ptrdiff_t
(
padding
[
d_2
]));
}
const
auto
group_id
=
w
/
(
wei_n
/
group
);
shape
win_shape
{
output_shape
.
type
(),
win_size
};
double
acc
=
0.0
;
shape_for_each
(
win_shape
,
[
&
](
auto
idx_win
)
{
auto
k
=
idx_win
[
0
];
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
std
::
vector
<
std
::
ptrdiff_t
>
idx
(
idx_o
.
begin
(),
idx_o
.
end
());
idx
[
1
]
=
in_ch
;
std
::
transform
(
idx_win
.
begin
()
+
1
,
idx_win
.
end
(),
win_start
.
begin
(),
idx
.
begin
()
+
2
,
[](
std
::
ptrdiff_t
ii
,
std
::
ptrdiff_t
jj
)
{
return
ii
+
jj
;
});
std
::
vector
<
std
::
ptrdiff_t
>
idx_wei
(
idx_o
.
size
());
idx_wei
[
0
]
=
w
;
std
::
copy
(
idx_win
.
begin
(),
idx_win
.
end
(),
idx_wei
.
begin
()
+
1
);
if
(
std
::
all_of
(
idx
.
begin
()
+
2
,
idx
.
end
(),
[
&
](
auto
ii
)
{
return
ii
>=
0
;
})
and
std
::
equal
(
idx
.
begin
(),
idx
.
end
(),
in_lens
.
begin
(),
in_lens
.
end
(),
std
::
less
<
std
::
ptrdiff_t
>
{}))
{
acc
+=
input
(
idx
.
begin
(),
idx
.
end
())
*
weights
(
idx_wei
.
begin
(),
idx_wei
.
end
());
}
});
output
[
i
]
=
acc
;
});
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/match/layernorm.hpp
View file @
3eaa0969
...
@@ -43,16 +43,23 @@ struct layernorm_matcher
...
@@ -43,16 +43,23 @@ struct layernorm_matcher
auto
variance
()
const
auto
variance
()
const
{
{
return
f
(
"reduce_mean"
)(
arg
(
0
)(
f
(
"pow"
)(
arg
(
0
)(
x_minus_mean
()),
arg
(
1
)(
has_value
(
2.0
f
)))));
return
f
(
"reduce_mean"
)(
arg
(
0
)(
any_of
(
f
(
"pow"
)(
arg
(
0
)(
x_minus_mean
()),
arg
(
1
)(
has_value
(
2.0
f
))),
f
(
"mul"
)(
arg
(
0
)(
x_minus_mean
()),
arg
(
1
)(
x_minus_mean
())),
f
(
"sqdiff"
)(
either_arg
(
0
,
1
)(
any
().
bind
(
"x"
),
skip_broadcasts
(
f
(
"reduce_mean"
)))))));
}
}
auto
layernorm_onnx
(
)
const
auto
sqrt_add_eps
(
const
std
::
string
&
name
)
const
{
{
auto
add_eps
=
f
(
"add"
)(
either_arg
(
0
,
1
)(
variance
(),
is_constant
().
bind
(
"eps"
)));
auto
add_eps
=
f
(
"add"
)(
either_arg
(
0
,
1
)(
variance
(),
is_constant
().
bind
(
"eps"
)));
return
f
(
"div"
)(
return
skip_broadcasts
(
f
(
name
)(
arg
(
0
)(
any_of
(
add_eps
,
variance
()))));
arg
(
0
)(
x_minus_mean
()),
}
arg
(
1
)(
skip_broadcasts
(
f
(
"sqrt"
)(
arg
(
0
)(
match
::
any_of
(
add_eps
,
variance
()))))));
auto
layernorm_onnx
()
const
{
auto
div_sqrt
=
f
(
"div"
)(
arg
(
0
)(
x_minus_mean
()),
arg
(
1
)(
sqrt_add_eps
(
"sqrt"
)));
auto
mul_rsqrt
=
f
(
"mul"
)(
either_arg
(
0
,
1
)(
x_minus_mean
(),
sqrt_add_eps
(
"rsqrt"
)));
return
any
(
any_of
(
div_sqrt
,
mul_rsqrt
));
}
}
auto
matcher
()
const
{
return
layernorm_onnx
();
}
auto
matcher
()
const
{
return
layernorm_onnx
();
}
...
...
src/include/migraphx/op/convolution.hpp
View file @
3eaa0969
...
@@ -24,9 +24,12 @@
...
@@ -24,9 +24,12 @@
#ifndef MIGRAPHX_GUARD_OPERATORS_CONVOLUTION_HPP
#ifndef MIGRAPHX_GUARD_OPERATORS_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_OPERATORS_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_OPERATORS_CONVOLUTION_HPP
#include <migraphx/argument.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/convolution.hpp>
#include <migraphx/pad_calc.hpp>
#include <migraphx/value.hpp>
#include <migraphx/value.hpp>
#include <cmath>
#include <cmath>
#include <utility>
#include <utility>
...
@@ -201,6 +204,37 @@ struct convolution
...
@@ -201,6 +204,37 @@ struct convolution
check_attribute_size
();
check_attribute_size
();
return
stride
.
size
();
return
stride
.
size
();
}
}
argument
compute
(
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
std
::
vector
<
std
::
size_t
>
new_padding
;
if
(
padding_mode
!=
op
::
padding_mode_t
::
default_
)
{
auto
input_lens
=
args
[
0
].
get_shape
().
lens
();
auto
weights_lens
=
args
[
1
].
get_shape
().
lens
();
new_padding
=
padding_mode
==
op
::
same_upper
?
calc_dyn_auto_pad
(
input_lens
,
weights_lens
,
stride
,
dilation
,
true
)
:
calc_dyn_auto_pad
(
input_lens
,
weights_lens
,
stride
,
dilation
,
false
);
output_shape
=
compute_padded_shape
(
args
[
0
].
get_shape
(),
args
[
1
].
get_shape
(),
new_padding
,
stride
,
dilation
);
}
else
{
new_padding
=
padding
;
if
(
output_shape
.
dynamic
())
{
output_shape
=
normalize_compute_shape
({
args
.
at
(
0
).
get_shape
(),
args
.
at
(
1
).
get_shape
()});
}
}
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input
,
auto
weights
)
{
migraphx
::
convolution
(
output
,
input
,
weights
,
new_padding
,
stride
,
group
);
});
return
result
;
}
};
};
}
// namespace op
}
// namespace op
...
...
src/include/migraphx/op/gather.hpp
View file @
3eaa0969
...
@@ -26,6 +26,7 @@
...
@@ -26,6 +26,7 @@
#include <array>
#include <array>
#include <migraphx/check_shapes.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/literal.hpp>
...
@@ -61,35 +62,59 @@ struct gather
...
@@ -61,35 +62,59 @@ struct gather
shape
normalize_compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
normalize_compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
2
);
auto
lens
=
inputs
[
0
].
lens
();
shape
data
=
inputs
[
0
];
auto
type
=
inputs
[
0
].
type
();
shape
indices
=
inputs
[
1
];
lens
.
erase
(
lens
.
begin
()
+
axis
);
auto
type
=
data
.
type
();
if
(
not
inputs
[
1
].
scalar
())
// If index_dims is dynamic, convert the data to dynamic too.
if
(
indices
.
dynamic
())
{
{
auto
ind_lens
=
inputs
[
1
].
lens
();
data
=
data
.
to_dynamic
();
lens
.
insert
(
lens
.
begin
()
+
axis
,
ind_lens
.
begin
(),
ind_lens
.
end
());
}
}
if
(
data
.
dynamic
())
// for scalar output
if
(
lens
.
empty
())
{
{
return
{
type
};
auto
dims
=
data
.
dyn_dims
();
dims
.
erase
(
dims
.
begin
()
+
axis
);
if
(
not
indices
.
scalar
())
{
auto
index_dims
=
indices
.
to_dynamic
().
dyn_dims
();
dims
.
insert
(
dims
.
begin
()
+
axis
,
index_dims
.
begin
(),
index_dims
.
end
());
}
return
{
type
,
dims
};
}
}
else
{
// Both data and indices are static. indices may be scalar
auto
lens
=
data
.
lens
();
lens
.
erase
(
lens
.
begin
()
+
axis
);
return
{
type
,
lens
};
if
(
not
indices
.
scalar
())
{
auto
ind_lens
=
indices
.
lens
();
lens
.
insert
(
lens
.
begin
()
+
axis
,
ind_lens
.
begin
(),
ind_lens
.
end
());
}
// for scalar output
if
(
lens
.
empty
())
{
return
{
type
};
}
return
{
type
,
lens
};
}
}
}
argument
compute
(
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
{
argument
result
{
out
put_shape
};
argument
result
{
dyn_out
.
com
put
ed
_shape
};
// negative axis means counting dimensions from back
// negative axis means counting dimensions from back
auto
lens
=
args
[
0
].
get_shape
().
lens
();
auto
lens
=
args
[
0
].
get_shape
().
lens
();
std
::
size_t
axis_dim_size
=
lens
[
axis
];
std
::
size_t
axis_dim_size
=
lens
[
axis
];
// max dimension in axis
// max dimension in axis
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
data
)
{
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
data
)
{
args
[
1
].
visit
([
&
](
auto
indices
)
{
args
[
1
].
visit
([
&
](
auto
indices
)
{
if
(
out
put_shape
.
scalar
())
if
(
dyn_out
.
com
put
ed
_shape
.
scalar
())
{
{
auto
in_index
=
indices
.
front
();
auto
in_index
=
indices
.
front
();
in_index
=
(
in_index
<
0
)
?
in_index
+
axis_dim_size
:
in_index
;
in_index
=
(
in_index
<
0
)
?
in_index
+
axis_dim_size
:
in_index
;
...
...
src/include/migraphx/op/quant_convolution.hpp
View file @
3eaa0969
...
@@ -25,8 +25,10 @@
...
@@ -25,8 +25,10 @@
#define MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
#include <migraphx/op/common.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/convolution.hpp>
#include <migraphx/value.hpp>
#include <migraphx/value.hpp>
#include <cmath>
#include <cmath>
#include <utility>
#include <utility>
...
@@ -114,6 +116,17 @@ struct quant_convolution
...
@@ -114,6 +116,17 @@ struct quant_convolution
check_attribute_size
();
check_attribute_size
();
return
stride
.
size
();
return
stride
.
size
();
}
}
argument
compute
(
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
result
.
visit
([
&
](
auto
output
)
{
visit_all
(
args
[
0
],
args
[
1
])([
&
](
auto
input
,
auto
weights
)
{
migraphx
::
convolution
(
output
,
input
,
weights
,
padding
,
stride
,
group
);
});
});
return
result
;
}
};
};
}
// namespace op
}
// namespace op
...
...
src/include/migraphx/optimize_module.hpp
0 → 100644
View file @
3eaa0969
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_OPTIMIZE_MODULE_HPP
#define MIGRAPHX_GUARD_RTGLIB_OPTIMIZE_MODULE_HPP
#include <string>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
module_pass_manager
;
/**
* Runs several passes in a loop
*/
struct
optimize_module
{
std
::
string
name
()
const
{
return
"optimize_module"
;
}
void
apply
(
module_pass_manager
&
mpm
)
const
;
};
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/module.cpp
View file @
3eaa0969
...
@@ -822,7 +822,8 @@ static void print_make_op(std::ostream& os, const operation& op)
...
@@ -822,7 +822,8 @@ static void print_make_op(std::ostream& os, const operation& op)
static
void
print_py_shape
(
std
::
ostream
&
os
,
const
migraphx
::
shape
&
s
)
static
void
print_py_shape
(
std
::
ostream
&
os
,
const
migraphx
::
shape
&
s
)
{
{
os
<<
"migraphx.shape("
<<
s
.
type_string
()
<<
", lens="
<<
to_json_string
(
s
.
lens
());
os
<<
"migraphx.shape(type="
<<
to_json_string
(
s
.
type_string
())
<<
", lens="
<<
to_json_string
(
s
.
lens
());
if
(
not
s
.
standard
())
if
(
not
s
.
standard
())
os
<<
", strides="
<<
to_json_string
(
s
.
strides
());
os
<<
", strides="
<<
to_json_string
(
s
.
strides
());
os
<<
")"
;
os
<<
")"
;
...
...
src/onnx/parse_gemm.cpp
View file @
3eaa0969
...
@@ -90,41 +90,45 @@ struct parse_gemm : op_parser<parse_gemm>
...
@@ -90,41 +90,45 @@ struct parse_gemm : op_parser<parse_gemm>
?
info
.
add_instruction
(
make_op
(
"transpose"
,
{{
"permutation"
,
perm
}}),
args
[
1
])
?
info
.
add_instruction
(
make_op
(
"transpose"
,
{{
"permutation"
,
perm
}}),
args
[
1
])
:
args
[
1
];
:
args
[
1
];
auto
ret
=
info
.
add_instruction
(
make_op
(
"dot"
),
a_arg
,
b_arg
);
auto
dot_ins
=
info
.
add_instruction
(
make_op
(
"dot"
),
a_arg
,
b_arg
);
if
(
args
.
size
()
==
3
)
if
(
args
.
size
()
==
3
)
{
{
// TODO: support dynamic C input
if
(
not
float_equal
(
beta
,
0.0
f
))
if
(
std
::
any_of
(
args
.
cbegin
(),
args
.
cend
(),
[](
auto
in_arg
)
{
return
in_arg
->
get_shape
().
dynamic
();
}))
{
{
MIGRAPHX_THROW
(
"PARSE_GEMM: C input not handled for dynamic input shapes"
);
auto
c_arg
=
args
[
2
];
}
if
(
dot_ins
->
get_shape
().
dynamic
())
if
(
not
float_equal
(
beta
,
0.0
f
)
and
args
[
2
]
->
get_shape
().
elements
()
>
0
)
{
{
c_arg
=
info
.
add_instruction
(
make_op
(
"multibroadcast"
),
args
[
2
],
dot_ins
);
auto
out_lens
=
a_arg
->
get_shape
().
lens
();
}
out_lens
.
back
()
=
b_arg
->
get_shape
().
lens
().
back
();
else
auto
c_arg
=
args
[
2
];
auto
c_lens
=
c_arg
->
get_shape
().
lens
();
if
(
not
std
::
equal
(
out_lens
.
begin
(),
out_lens
.
end
(),
c_lens
.
begin
(),
c_lens
.
end
()))
{
{
c_arg
=
info
.
add_instruction
(
auto
out_lens
=
a_arg
->
get_shape
().
lens
();
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
out_lens
}}),
args
[
2
]);
out_lens
.
back
()
=
b_arg
->
get_shape
().
lens
().
back
();
auto
c_lens
=
c_arg
->
get_shape
().
lens
();
if
(
not
std
::
equal
(
out_lens
.
begin
(),
out_lens
.
end
(),
c_lens
.
begin
(),
c_lens
.
end
()))
{
c_arg
=
info
.
add_instruction
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
out_lens
}}),
args
[
2
]);
}
}
}
auto
beta_literal
=
info
.
add_literal
(
beta
);
auto
beta_c
=
info
.
add_broadcastable_binary_op
(
"mul"
,
c_arg
,
beta_literal
);
if
(
not
float_equal
(
beta
,
1.0
f
))
if
(
beta_c
->
get_shape
().
type
()
!=
dot_type
)
{
{
beta_c
=
info
.
add_instruction
(
make_op
(
"convert"
,
{{
"target_type"
,
dot_type
}}),
auto
beta_literal
=
info
.
add_literal
(
beta
);
beta_c
);
c_arg
=
info
.
add_broadcastable_binary_op
(
"mul"
,
c_arg
,
beta_literal
);
if
(
c_arg
->
get_shape
().
type
()
!=
dot_type
)
{
c_arg
=
info
.
add_instruction
(
make_op
(
"convert"
,
{{
"target_type"
,
dot_type
}}),
c_arg
);
}
}
}
return
info
.
add_instruction
(
make_op
(
"add"
),
ret
,
beta_c
);
return
info
.
add_instruction
(
make_op
(
"add"
),
dot_ins
,
c_arg
);
}
}
}
}
return
dot_ins
;
return
ret
;
}
}
};
};
...
...
src/onnx/parse_trilu.cpp
0 → 100644
View file @
3eaa0969
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
struct
parse_trilu
:
op_parser
<
parse_trilu
>
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Trilu"
}};
}
instruction_ref
parse
(
const
op_desc
&
,
const
onnx_parser
&
,
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
{
auto
input_shape
=
args
[
0
]
->
get_shape
();
assert
(
input_shape
.
ndim
()
>=
2
);
auto
input_lens
=
input_shape
.
lens
();
size_t
num_rows
=
*
(
input_lens
.
rbegin
()
+
1
);
size_t
num_cols
=
input_lens
.
back
();
int
k
=
0
;
bool
upper
=
true
;
if
(
args
.
size
()
>
1
)
{
auto
arg_k
=
args
[
1
]
->
eval
();
check_arg_empty
(
arg_k
,
"PARSE_TRILU: dynamic k not supported"
);
k
=
arg_k
.
at
<
int
>
();
}
if
(
k
<
0
)
MIGRAPHX_THROW
(
"PARSE_TRILU: negative k values not supported"
);
if
(
contains
(
info
.
attributes
,
"upper"
))
{
upper
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"upper"
).
i
());
}
shape
::
type_t
output_type
=
args
[
0
]
->
get_shape
().
type
();
// when creating the mask, if upper == 1,
// the inner triangle will have values set to 0
std
::
vector
<
bool
>
mask_mat
(
num_rows
*
num_cols
,
upper
);
for
(
size_t
i
=
0
;
i
<
num_rows
;
i
++
)
{
for
(
size_t
j
=
0
;
j
<
std
::
min
(
k
,
static_cast
<
int
>
(
num_cols
));
j
++
)
{
mask_mat
[
i
*
num_cols
+
j
]
=
not
upper
;
}
k
++
;
}
auto
mask
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
output_type
,
{
num_rows
,
num_cols
}},
mask_mat
});
return
info
.
add_broadcastable_binary_op
(
"mul"
,
mask
,
args
[
0
]);
}
};
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/optimize_module.cpp
0 → 100644
View file @
3eaa0969
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/optimize_module.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/propagate_constant.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
void
optimize_module
::
apply
(
module_pass_manager
&
mpm
)
const
{
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
mpm
.
run_pass
(
simplify_reshapes
{});
mpm
.
run_pass
(
simplify_algebra
{});
mpm
.
run_pass
(
eliminate_common_subexpression
{});
mpm
.
run_pass
(
dead_code_elimination
{});
mpm
.
run_pass
(
propagate_constant
{});
mpm
.
run_pass
(
dead_code_elimination
{});
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/simplify_algebra.cpp
View file @
3eaa0969
...
@@ -31,6 +31,7 @@
...
@@ -31,6 +31,7 @@
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/transpose.hpp>
#include <migraphx/op/transpose.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/common.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/serialize.hpp>
#include <migraphx/serialize.hpp>
...
@@ -51,8 +52,9 @@ auto op_lit_broadcast(std::string op, std::string x, std::string y)
...
@@ -51,8 +52,9 @@ auto op_lit_broadcast(std::string op, std::string x, std::string y)
auto
conv_const_weights
()
auto
conv_const_weights
()
{
{
return
match
::
name
(
"convolution"
)(
match
::
used_once
(),
return
match
::
name
(
"convolution"
)(
match
::
args
(
match
::
any
(),
match
::
is_constant
().
bind
(
"w"
)));
match
::
used_once
(),
match
::
args
(
match
::
none_of
(
match
::
is_constant
()),
match
::
is_constant
().
bind
(
"w"
)));
}
}
auto
reduction
()
{
return
match
::
name_contains
(
"reduce"
);
}
auto
reduction
()
{
return
match
::
name_contains
(
"reduce"
);
}
...
@@ -267,6 +269,32 @@ struct find_dot_add
...
@@ -267,6 +269,32 @@ struct find_dot_add
}
}
};
};
struct
find_conv_add
{
auto
matcher
()
const
{
auto
add
=
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
any
().
bind
(
"x"
),
match
::
any_of
(
match
::
is_constant
()).
bind
(
"a"
)),
match
::
used_once
());
return
match
::
name
(
"convolution"
)(
match
::
used_once
(),
match
::
args
(
add
,
match
::
is_constant
().
bind
(
"w"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
w_ins
=
r
.
instructions
[
"w"
];
auto
conv1
=
m
.
insert_instruction
(
ins
,
ins
->
get_operator
(),
a_ins
,
w_ins
);
auto
conv2
=
m
.
insert_instruction
(
ins
,
ins
->
get_operator
(),
x_ins
,
w_ins
);
m
.
replace_instruction
(
ins
,
make_op
(
"add"
),
conv1
,
conv2
);
}
};
struct
find_add_lit_broadcast
struct
find_add_lit_broadcast
{
{
auto
matcher
()
const
auto
matcher
()
const
...
@@ -340,12 +368,18 @@ struct find_inner_broadcast
...
@@ -340,12 +368,18 @@ struct find_inner_broadcast
std
::
back_inserter
(
inputs
),
std
::
back_inserter
(
inputs
),
[](
auto
i
)
{
return
i
->
inputs
().
front
();
});
[](
auto
i
)
{
return
i
->
inputs
().
front
();
});
if
(
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
i
)
{
if
(
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
i
)
{
return
i
->
get_shape
()
!=
inputs
.
front
()
->
get_shape
();
return
i
->
get_shape
()
!=
inputs
.
front
()
->
get_shape
()
and
i
->
get_shape
().
elements
()
!=
1
;
}))
}))
return
;
return
;
auto
op
=
m
.
insert_instruction
(
ins
,
ins
->
get_operator
(),
inputs
);
auto
b_it
=
std
::
find_if
(
broadcasts
.
begin
(),
broadcasts
.
end
(),
[
&
](
auto
i
)
{
m
.
replace_instruction
(
ins
,
broadcasts
.
front
()
->
get_operator
(),
op
);
return
not
i
->
get_shape
().
scalar
();
});
if
(
b_it
==
broadcasts
.
end
())
b_it
=
broadcasts
.
begin
();
auto
op
=
insert_common_op
(
m
,
ins
,
ins
->
get_operator
(),
inputs
);
m
.
replace_instruction
(
ins
,
(
*
b_it
)
->
get_operator
(),
op
);
}
}
};
};
...
@@ -1232,6 +1266,7 @@ void simplify_algebra::apply(module& m) const
...
@@ -1232,6 +1266,7 @@ void simplify_algebra::apply(module& m) const
find_neg_unit_ops
{},
find_neg_unit_ops
{},
find_zero_ops
{},
find_zero_ops
{},
find_dot_add
{},
find_dot_add
{},
find_conv_add
{},
find_div_const
{},
find_div_const
{},
find_sub_const
{},
find_sub_const
{},
find_rsqrt
{},
find_rsqrt
{},
...
...
src/targets/gpu/CMakeLists.txt
View file @
3eaa0969
#####################################################################################
#
####################################################################################
# The MIT License (MIT)
# The MIT License (MIT)
#
#
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
...
@@ -20,7 +20,7 @@
...
@@ -20,7 +20,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
# THE SOFTWARE.
#####################################################################################
#
####################################################################################
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip /opt/rocm/hcc
)
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip /opt/rocm/hcc
)
find_package
(
miopen
)
find_package
(
miopen
)
...
@@ -33,6 +33,8 @@ if(NOT TARGET MIOpen)
...
@@ -33,6 +33,8 @@ if(NOT TARGET MIOpen)
message
(
SEND_ERROR
"Cant find miopen"
)
message
(
SEND_ERROR
"Cant find miopen"
)
endif
()
endif
()
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
"Use hipRTC APIs"
)
include
(
Embed
)
include
(
Embed
)
file
(
GLOB KERNEL_FILES
${
CONFIGURE_DEPENDS
}
file
(
GLOB KERNEL_FILES
${
CONFIGURE_DEPENDS
}
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/migraphx/kernels/*.hpp
)
...
@@ -46,9 +48,10 @@ add_library(compile_for_gpu INTERFACE)
...
@@ -46,9 +48,10 @@ add_library(compile_for_gpu INTERFACE)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored
)
check_cxx_compiler_flag
(
"--cuda-host-only -fhip-lambda-host-device -x hip"
HAS_HIP_LAMBDA_HOST_DEVICE
)
check_cxx_compiler_flag
(
"--cuda-host-only -fhip-lambda-host-device -x hip"
HAS_HIP_LAMBDA_HOST_DEVICE
)
if
(
HAS_HIP_LAMBDA_HOST_DEVICE
)
if
(
HAS_HIP_LAMBDA_HOST_DEVICE
)
message
(
STATUS
"Enable -fhip-lambda-host-device"
)
message
(
STATUS
"Enable -fhip-lambda-host-device"
)
target_compile_options
(
compile_for_gpu INTERFACE -fhip-lambda-host-device
)
target_compile_options
(
compile_for_gpu INTERFACE -fhip-lambda-host-device
)
endif
()
endif
()
set_target_properties
(
migraphx_device PROPERTIES EXPORT_NAME device
)
set_target_properties
(
migraphx_device PROPERTIES EXPORT_NAME device
)
...
@@ -60,11 +63,13 @@ target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURR
...
@@ -60,11 +63,13 @@ target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURR
target_include_directories
(
migraphx_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
target_include_directories
(
migraphx_device PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/device/include>
)
add_library
(
kernel_file_check EXCLUDE_FROM_ALL
)
add_library
(
kernel_file_check EXCLUDE_FROM_ALL
)
foreach
(
KERNEL_FILE
${
KERNEL_FILES
}
)
foreach
(
KERNEL_FILE
${
KERNEL_FILES
}
)
get_filename_component
(
KERNEL_BASE_FILE
${
KERNEL_FILE
}
NAME_WE
)
get_filename_component
(
KERNEL_BASE_FILE
${
KERNEL_FILE
}
NAME_WE
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/kernels/include/migraphx/kernels/
${
KERNEL_BASE_FILE
}
.cpp
"#include <migraphx/kernels/
${
KERNEL_BASE_FILE
}
.hpp>
\n
"
)
file
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/kernels/include/migraphx/kernels/
${
KERNEL_BASE_FILE
}
.cpp
"#include <migraphx/kernels/
${
KERNEL_BASE_FILE
}
.hpp>
\n
"
)
target_sources
(
kernel_file_check PRIVATE
${
CMAKE_CURRENT_BINARY_DIR
}
/kernels/include/migraphx/kernels/
${
KERNEL_BASE_FILE
}
.cpp
)
target_sources
(
kernel_file_check PRIVATE
${
CMAKE_CURRENT_BINARY_DIR
}
/kernels/include/migraphx/kernels/
${
KERNEL_BASE_FILE
}
.cpp
)
endforeach
()
endforeach
()
target_compile_definitions
(
kernel_file_check PRIVATE -DMIGRAPHX_NLOCAL=256
)
target_compile_definitions
(
kernel_file_check PRIVATE -DMIGRAPHX_NLOCAL=256
)
target_include_directories
(
kernel_file_check PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/>
)
target_include_directories
(
kernel_file_check PRIVATE $<BUILD_INTERFACE:
${
CMAKE_CURRENT_SOURCE_DIR
}
/kernels/include/>
)
target_link_libraries
(
kernel_file_check compile_for_gpu
)
target_link_libraries
(
kernel_file_check compile_for_gpu
)
...
@@ -127,6 +132,7 @@ function(register_migraphx_gpu_ops PREFIX)
...
@@ -127,6 +132,7 @@ function(register_migraphx_gpu_ops PREFIX)
register_op
(
migraphx_gpu HEADER migraphx/gpu/
${
OP
}
.hpp OPERATORS gpu::
${
PREFIX
}${
OP
}
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu HEADER migraphx/gpu/
${
OP
}
.hpp OPERATORS gpu::
${
PREFIX
}${
OP
}
INCLUDES migraphx/gpu/context.hpp
)
endforeach
()
endforeach
()
endfunction
()
endfunction
()
register_migraphx_gpu_ops
(
hip_
register_migraphx_gpu_ops
(
hip_
argmax
argmax
argmin
argmin
...
@@ -148,47 +154,41 @@ register_migraphx_gpu_ops(miopen_
...
@@ -148,47 +154,41 @@ register_migraphx_gpu_ops(miopen_
lrn
lrn
pooling
pooling
)
)
register_op
(
migraphx_gpu
register_op
(
migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
INCLUDES migraphx/gpu/context.hpp
)
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu
register_op
(
migraphx_gpu
HEADER migraphx/gpu/int8_gemm_pack.hpp
HEADER migraphx/gpu/int8_gemm_pack.hpp
OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b
OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b
INCLUDES migraphx/gpu/context.hpp
)
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu
register_op
(
migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp
)
INCLUDES migraphx/gpu/context.hpp
)
register_op
(
migraphx_gpu HEADER migraphx/gpu/convolution.hpp
register_op
(
migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution>
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp
)
INCLUDES migraphx/gpu/context.hpp
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_set_soversion
(
migraphx_gpu
${
MIGRAPHX_SO_VERSION
}
)
rocm_clang_tidy_check
(
migraphx_gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
# look for offload bundler
get_filename_component
(
CMAKE_CXX_COMPILER_PATH
"
${
CMAKE_CXX_COMPILER
}
"
PATH
)
get_filename_component
(
CMAKE_CXX_COMPILER_PATH
"
${
CMAKE_CXX_COMPILER
}
"
PATH
)
if
(
CMAKE_CXX_COMPILER MATCHES
".*clang
\\
+
\\
+$"
)
find_program
(
MIGRAPHX_OFFLOADBUNDLER_BIN clang-offload-bundler
if
(
NOT CMAKE_CXX_COMPILER MATCHES
".*clang
\\
+
\\
+$"
)
HINTS
${
CMAKE_CXX_COMPILER_PATH
}
PATH_SUFFIXES bin
PATHS /opt/rocm/llvm
)
else
()
find_program
(
MIGRAPHX_EXTRACT_KERNEL extractkernel
find_program
(
MIGRAPHX_EXTRACT_KERNEL extractkernel
PATH_SUFFIXES bin
PATH_SUFFIXES bin
HINTS
${
CMAKE_CXX_COMPILER_PATH
}
HINTS
${
CMAKE_CXX_COMPILER_PATH
}
PATHS
PATHS
/opt/rocm/hip
/opt/rocm/hip
/opt/rocm/hcc
/opt/rocm/hcc
/opt/rocm
/opt/rocm
)
)
endif
()
endif
()
message
(
STATUS
"clang-offload-bundler:
${
MIGRAPHX_OFFLOADBUNDLER_BIN
}
"
)
message
(
STATUS
"extractkernel:
${
MIGRAPHX_EXTRACT_KERNEL
}
"
)
message
(
STATUS
"extractkernel:
${
MIGRAPHX_EXTRACT_KERNEL
}
"
)
set
(
MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL
""
)
set
(
MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL
""
)
if
(
MIGRAPHX_ENABLE_MLIR
)
if
(
MIGRAPHX_ENABLE_MLIR
)
# Find package rocMLIR
# Find package rocMLIR
find_package
(
rocMLIR 1.0.0 CONFIG REQUIRED
)
find_package
(
rocMLIR 1.0.0 CONFIG REQUIRED
)
...
@@ -197,40 +197,43 @@ if(MIGRAPHX_ENABLE_MLIR)
...
@@ -197,40 +197,43 @@ if(MIGRAPHX_ENABLE_MLIR)
target_link_libraries
(
migraphx_gpu PUBLIC rocMLIR::rockCompiler
)
target_link_libraries
(
migraphx_gpu PUBLIC rocMLIR::rockCompiler
)
endif
()
endif
()
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
""
)
if
(
MIGRAPHX_USE_HIPRTC
)
if
(
MIGRAPHX_USE_HIPRTC
)
target_compile_definitions
(
migraphx_gpu PRIVATE -DMIGRAPHX_USE_HIPRTC=1
)
message
(
STATUS
"MIGraphX is using hipRTC"
)
target_compile_definitions
(
migraphx_gpu PRIVATE -DMIGRAPHX_USE_HIPRTC=1
)
else
()
else
()
# Get flags needed to compile hip
message
(
STATUS
"MIGraphX is using HIP Clang"
)
include
(
TargetFlags
)
target_flags
(
HIP_COMPILER_FLAGS hip::device
)
# Remove cuda arch flags
string
(
REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+
""
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
string
(
REGEX REPLACE --offload-arch=[a-z0-9:+-]+
""
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
# Skip library paths since hip will incorrectly treat it as a source file
string
(
APPEND HIP_COMPILER_FLAGS
" "
)
# Add ck includes
find_path
(
CK_INCLUDE_PATH ck/ck.hpp
)
message
(
STATUS
"CK path:
${
CK_INCLUDE_PATH
}
"
)
string
(
APPEND HIP_COMPILER_FLAGS
" -isystem
${
CK_INCLUDE_PATH
}
"
)
foreach
(
_unused RANGE 2
)
string
(
REGEX REPLACE
" /[^ ]+
\\
.(a|so) "
" "
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
endforeach
()
message
(
STATUS
"Hip compiler flags:
${
HIP_COMPILER_FLAGS
}
"
)
# Get flags needed to compile hip
target_compile_definitions
(
migraphx_gpu PRIVATE
include
(
TargetFlags
)
"-DMIGRAPHX_HIP_COMPILER=
${
CMAKE_CXX_COMPILER
}
"
target_flags
(
HIP_COMPILER_FLAGS hip::device
)
"-DMIGRAPHX_HIP_COMPILER_FLAGS=
${
HIP_COMPILER_FLAGS
}
"
"-DMIGRAPHX_OFFLOADBUNDLER_BIN=
${
MIGRAPHX_OFFLOADBUNDLER_BIN
}
"
# Remove cuda arch flags
"-DMIGRAPHX_EXTRACT_KERNEL=
${
MIGRAPHX_EXTRACT_KERNEL
}
"
string
(
REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+
""
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
"-DMIGRAPHX_USE_HIPRTC=0"
string
(
REGEX REPLACE --offload-arch=[a-z0-9:+-]+
""
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
)
if
(
DEFINED CMAKE_CXX_COMPILER_LAUNCHER
)
# Skip library paths since hip will incorrectly treat it as a source file
execute_process
(
COMMAND which
${
CMAKE_CXX_COMPILER_LAUNCHER
}
OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER
)
string
(
APPEND HIP_COMPILER_FLAGS
" "
)
string
(
STRIP
"
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
MIGRAPHX_HIP_COMPILER_LAUNCHER
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER_LAUNCHER=
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
)
endif
()
# Add ck includes
find_path
(
CK_INCLUDE_PATH ck/ck.hpp
)
message
(
STATUS
"CK path:
${
CK_INCLUDE_PATH
}
"
)
string
(
APPEND HIP_COMPILER_FLAGS
" -isystem
${
CK_INCLUDE_PATH
}
"
)
foreach
(
_unused RANGE 2
)
string
(
REGEX REPLACE
" /[^ ]+
\\
.(a|so) "
" "
HIP_COMPILER_FLAGS
"
${
HIP_COMPILER_FLAGS
}
"
)
endforeach
()
message
(
STATUS
"Hip compiler flags:
${
HIP_COMPILER_FLAGS
}
"
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER=
${
CMAKE_CXX_COMPILER
}
"
"-DMIGRAPHX_HIP_COMPILER_FLAGS=
${
HIP_COMPILER_FLAGS
}
"
"-DMIGRAPHX_EXTRACT_KERNEL=
${
MIGRAPHX_EXTRACT_KERNEL
}
"
)
if
(
DEFINED CMAKE_CXX_COMPILER_LAUNCHER
)
execute_process
(
COMMAND which
${
CMAKE_CXX_COMPILER_LAUNCHER
}
OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER
)
string
(
STRIP
"
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
MIGRAPHX_HIP_COMPILER_LAUNCHER
)
target_compile_definitions
(
migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER_LAUNCHER=
${
MIGRAPHX_HIP_COMPILER_LAUNCHER
}
"
)
endif
()
endif
()
endif
()
# Check miopen find mode api
# Check miopen find mode api
...
@@ -242,7 +245,7 @@ check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_
...
@@ -242,7 +245,7 @@ check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_
# TODO: Set default to HAS_FIND_2_API
# TODO: Set default to HAS_FIND_2_API
set
(
MIGRAPHX_USE_FIND_2_API OFF CACHE BOOL
""
)
set
(
MIGRAPHX_USE_FIND_2_API OFF CACHE BOOL
""
)
if
(
MIGRAPHX_USE_FIND_2_API
)
if
(
MIGRAPHX_USE_FIND_2_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API
)
message
(
STATUS
"MIGraphx is using Find-2.0 API of MIOpen"
)
message
(
STATUS
"MIGraphx is using Find-2.0 API of MIOpen"
)
else
()
else
()
...
@@ -264,8 +267,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
...
@@ -264,8 +267,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory
(
driver
)
add_subdirectory
(
driver
)
rocm_install_targets
(
rocm_install_targets
(
TARGETS migraphx_gpu migraphx_device compile_for_gpu
TARGETS migraphx_gpu migraphx_device compile_for_gpu
INCLUDE
INCLUDE
${
CMAKE_CURRENT_SOURCE_DIR
}
/include
${
CMAKE_CURRENT_SOURCE_DIR
}
/include
)
)
src/targets/gpu/compile_hip.cpp
View file @
3eaa0969
...
@@ -29,10 +29,9 @@
...
@@ -29,10 +29,9 @@
#include <cassert>
#include <cassert>
#include <iostream>
#include <iostream>
#if MIGRAPHX_USE_HIPRTC
#if
def
MIGRAPHX_USE_HIPRTC
#include <hip/hiprtc.h>
#include <hip/hiprtc.h>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/env.hpp>
#else
#else
#include <migraphx/compile_src.hpp>
#include <migraphx/compile_src.hpp>
#include <migraphx/process.hpp>
#include <migraphx/process.hpp>
...
@@ -48,9 +47,10 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
...
@@ -48,9 +47,10 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_ASM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_ASM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_SRC
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_SRC
);
#if MIGRAPHX_USE_HIPRTC
#if
def
MIGRAPHX_USE_HIPRTC
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_HIPRTC
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_HIPRTC
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS
);
std
::
string
hiprtc_error
(
hiprtcResult
err
,
const
std
::
string
&
msg
)
std
::
string
hiprtc_error
(
hiprtcResult
err
,
const
std
::
string
&
msg
)
{
{
...
@@ -143,25 +143,29 @@ struct hiprtc_program
...
@@ -143,25 +143,29 @@ struct hiprtc_program
options
.
end
(),
options
.
end
(),
std
::
back_inserter
(
c_options
),
std
::
back_inserter
(
c_options
),
[](
const
std
::
string
&
s
)
{
return
s
.
c_str
();
});
[](
const
std
::
string
&
s
)
{
return
s
.
c_str
();
});
auto
result
=
hiprtcCompileProgram
(
prog
.
get
(),
c_options
.
size
(),
c_options
.
data
());
auto
result
=
hiprtcCompileProgram
(
prog
.
get
(),
c_options
.
size
(),
c_options
.
data
());
std
::
cerr
<<
log
()
<<
std
::
endl
;
auto
prog_log
=
log
();
if
(
not
prog_log
.
empty
())
{
std
::
cerr
<<
prog_log
<<
std
::
endl
;
}
if
(
result
!=
HIPRTC_SUCCESS
)
if
(
result
!=
HIPRTC_SUCCESS
)
MIGRAPHX_HIPRTC_THROW
(
result
,
"Compilation failed."
);
MIGRAPHX_HIPRTC_THROW
(
result
,
"Compilation failed."
);
}
}
std
::
string
log
()
std
::
string
log
()
const
{
{
std
::
size_t
n
=
0
;
std
::
size_t
n
=
0
;
MIGRAPHX_HIPRTC
(
hiprtcGetProgramLogSize
(
prog
.
get
(),
&
n
));
MIGRAPHX_HIPRTC
(
hiprtcGetProgramLogSize
(
prog
.
get
(),
&
n
));
if
(
n
<
2
)
if
(
n
==
0
)
return
{};
return
{};
std
::
vector
<
char
>
buffer
(
n
);
std
::
string
buffer
(
n
,
'\0'
);
MIGRAPHX_HIPRTC
(
hiprtcGetProgramLog
(
prog
.
get
(),
buffer
.
data
()));
MIGRAPHX_HIPRTC
(
hiprtcGetProgramLog
(
prog
.
get
(),
buffer
.
data
()));
assert
(
buffer
.
back
()
=
=
0
);
assert
(
buffer
.
back
()
!
=
0
);
return
{
buffer
.
begin
(),
buffer
.
end
()
-
1
}
;
return
buffer
;
}
}
std
::
vector
<
char
>
get_code_obj
()
std
::
vector
<
char
>
get_code_obj
()
const
{
{
std
::
size_t
n
=
0
;
std
::
size_t
n
=
0
;
MIGRAPHX_HIPRTC
(
hiprtcGetCodeSize
(
prog
.
get
(),
&
n
));
MIGRAPHX_HIPRTC
(
hiprtcGetCodeSize
(
prog
.
get
(),
&
n
));
...
@@ -176,6 +180,17 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -176,6 +180,17 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
{
{
hiprtc_program
prog
(
srcs
);
hiprtc_program
prog
(
srcs
);
auto
options
=
split_string
(
params
,
' '
);
auto
options
=
split_string
(
params
,
' '
);
options
.
push_back
(
"-DMIGRAPHX_USE_HIPRTC=1"
);
// remove following three compilation flags for HIPRTC once fixes from hipRTC are available in
if
(
enabled
(
MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS
{}))
{
options
.
push_back
(
"-DMIGRAPHX_HAS_DPP=0"
);
options
.
push_back
(
"-DMIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1"
);
options
.
push_back
(
"-Wno-reserved-identifier"
);
options
.
push_back
(
"-Wno-gnu-line-marker"
);
options
.
push_back
(
"-Wno-old-style-cast"
);
}
if
(
enabled
(
MIGRAPHX_GPU_DEBUG
{}))
if
(
enabled
(
MIGRAPHX_GPU_DEBUG
{}))
options
.
push_back
(
"-DMIGRAPHX_DEBUG"
);
options
.
push_back
(
"-DMIGRAPHX_DEBUG"
);
if
(
std
::
none_of
(
options
.
begin
(),
options
.
end
(),
[](
const
std
::
string
&
s
)
{
if
(
std
::
none_of
(
options
.
begin
(),
options
.
end
(),
[](
const
std
::
string
&
s
)
{
...
@@ -183,7 +198,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -183,7 +198,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
}))
}))
options
.
push_back
(
"-std=c++17"
);
options
.
push_back
(
"-std=c++17"
);
options
.
push_back
(
"-fno-gpu-rdc"
);
options
.
push_back
(
"-fno-gpu-rdc"
);
options
.
push_back
(
"
-O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
));
options
.
push_back
(
"-O"
+
string_value_of
(
MIGRAPHX_GPU_OPTIMIZE
{},
"3"
));
options
.
push_back
(
"-Wno-cuda-compat"
);
options
.
push_back
(
"-Wno-cuda-compat"
);
options
.
push_back
(
"--offload-arch="
+
arch
);
options
.
push_back
(
"--offload-arch="
+
arch
);
prog
.
compile
(
options
);
prog
.
compile
(
options
);
...
@@ -292,6 +307,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -292,6 +307,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
return
{
compiler
.
compile
(
srcs
)};
return
{
compiler
.
compile
(
srcs
)};
}
}
#endif // MIGRAPHX_USE_HIPRTC
std
::
string
enum_params
(
std
::
size_t
count
,
std
::
string
param
)
std
::
string
enum_params
(
std
::
size_t
count
,
std
::
string
param
)
{
{
std
::
vector
<
std
::
string
>
items
(
count
);
std
::
vector
<
std
::
string
>
items
(
count
);
...
@@ -299,8 +316,6 @@ std::string enum_params(std::size_t count, std::string param)
...
@@ -299,8 +316,6 @@ std::string enum_params(std::size_t count, std::string param)
return
join_strings
(
items
,
","
);
return
join_strings
(
items
,
","
);
}
}
#endif // MIGRAPHX_USE_HIPRTC
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/gpu/compile_hip_code_object.cpp
View file @
3eaa0969
...
@@ -29,7 +29,6 @@
...
@@ -29,7 +29,6 @@
#include <migraphx/context.hpp>
#include <migraphx/context.hpp>
#include <migraphx_kernels.hpp>
#include <migraphx_kernels.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -80,6 +79,7 @@ std::string generate_args_hpp(const std::vector<shape>& inputs)
...
@@ -80,6 +79,7 @@ std::string generate_args_hpp(const std::vector<shape>& inputs)
#include <migraphx/kernels/args.hpp>
#include <migraphx/kernels/args.hpp>
#include <migraphx/kernels/tensor_view.hpp>
#include <migraphx/kernels/tensor_view.hpp>
#include <migraphx/kernels/types.hpp>
namespace migraphx {
namespace migraphx {
...
...
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
View file @
3eaa0969
...
@@ -36,6 +36,7 @@ namespace gpu {
...
@@ -36,6 +36,7 @@ namespace gpu {
namespace
device
{
namespace
device
{
#ifdef MIGRAPHX_NO_DPP
#ifdef MIGRAPHX_NO_DPP
template
<
index_int
N
,
template
<
index_int
N
,
class
Op
,
class
Op
,
class
T
,
class
T
,
...
@@ -62,6 +63,7 @@ __device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f)
...
@@ -62,6 +63,7 @@ __device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f)
}
}
return
buffer
[
0
];
return
buffer
[
0
];
}
}
#else
#else
constexpr
unsigned
int
dpp_row_shr
(
unsigned
int
x
)
{
return
0x110u
|
x
;
}
constexpr
unsigned
int
dpp_row_shr
(
unsigned
int
x
)
{
return
0x110u
|
x
;
}
...
@@ -96,11 +98,7 @@ __device__ T dpp_mov(T& x)
...
@@ -96,11 +98,7 @@ __device__ T dpp_mov(T& x)
input
.
data
=
x
;
input
.
data
=
x
;
for
(
index_int
i
=
0
;
i
<
n
;
i
++
)
for
(
index_int
i
=
0
;
i
<
n
;
i
++
)
{
{
#if defined(__HCC__)
output
.
reg
[
i
]
=
__llvm_amdgcn_move_dpp
(
input
.
reg
[
i
],
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
#else
output
.
reg
[
i
]
=
__hip_move_dpp
(
input
.
reg
[
i
],
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
output
.
reg
[
i
]
=
__hip_move_dpp
(
input
.
reg
[
i
],
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
#endif
}
}
return
output
.
data
;
return
output
.
data
;
}
}
...
@@ -310,4 +308,4 @@ void reduce(hipStream_t stream,
...
@@ -310,4 +308,4 @@ void reduce(hipStream_t stream,
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
#endif
// MIGRAPHX_NO_DPP
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
3eaa0969
...
@@ -21,8 +21,8 @@
...
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_
GPU_
CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_
GPU_
CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/generate.hpp>
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment