Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
0a8342b8
Commit
0a8342b8
authored
May 06, 2022
by
charlie
Browse files
Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_shape_update
parents
b31735e8
f55d7c24
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
77 additions
and
56 deletions
+77
-56
.github/workflows/ci.yaml
.github/workflows/ci.yaml
+6
-5
CMakeLists.txt
CMakeLists.txt
+9
-2
Dockerfile
Dockerfile
+2
-6
cmake/Embed.cmake
cmake/Embed.cmake
+1
-0
cmake/EnableCompilerWarnings.cmake
cmake/EnableCompilerWarnings.cmake
+1
-0
hip-clang.docker
hip-clang.docker
+2
-5
src/api/include/migraphx/migraphx.hpp
src/api/include/migraphx/migraphx.hpp
+2
-0
src/driver/marker_roctx.cpp
src/driver/marker_roctx.cpp
+1
-1
src/include/migraphx/filesystem.hpp
src/include/migraphx/filesystem.hpp
+4
-1
src/include/migraphx/optional.hpp
src/include/migraphx/optional.hpp
+4
-1
src/onnx/parse_mean.cpp
src/onnx/parse_mean.cpp
+11
-8
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+1
-1
src/targets/cpu/include/migraphx/cpu/parallel.hpp
src/targets/cpu/include/migraphx/cpu/parallel.hpp
+9
-0
src/targets/cpu/include/migraphx/cpu/pointwise.hpp
src/targets/cpu/include/migraphx/cpu/pointwise.hpp
+2
-2
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+1
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-1
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+1
-0
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+1
-2
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
...targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
+17
-19
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+1
-1
No files found.
.github/workflows/ci.yaml
View file @
0a8342b8
...
@@ -15,7 +15,8 @@ jobs:
...
@@ -15,7 +15,8 @@ jobs:
steps
:
steps
:
-
name
:
Free space
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v2
-
uses
:
actions/checkout@v2
# In this step, this action saves a list of existing images,
# In this step, this action saves a list of existing images,
...
@@ -63,7 +64,7 @@ jobs:
...
@@ -63,7 +64,7 @@ jobs:
steps
:
steps
:
-
name
:
Free space
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
/usr/local/lib/android
-
uses
:
actions/checkout@v2
-
uses
:
actions/checkout@v2
# In this step, this action saves a list of existing images,
# In this step, this action saves a list of existing images,
...
@@ -108,7 +109,7 @@ jobs:
...
@@ -108,7 +109,7 @@ jobs:
steps
:
steps
:
-
name
:
Free space
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
/usr/local/lib/android
-
uses
:
actions/checkout@v2
-
uses
:
actions/checkout@v2
# In this step, this action saves a list of existing images,
# In this step, this action saves a list of existing images,
...
@@ -143,7 +144,7 @@ jobs:
...
@@ -143,7 +144,7 @@ jobs:
steps
:
steps
:
-
name
:
Free space
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
/usr/local/lib/android
-
uses
:
actions/checkout@v2
-
uses
:
actions/checkout@v2
-
name
:
Set up Python
-
name
:
Set up Python
uses
:
actions/setup-python@v2
uses
:
actions/setup-python@v2
...
@@ -182,7 +183,7 @@ jobs:
...
@@ -182,7 +183,7 @@ jobs:
steps
:
steps
:
-
name
:
Free space
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia*
/usr/local/lib/android
-
uses
:
actions/checkout@v2
-
uses
:
actions/checkout@v2
-
name
:
Set up Python
-
name
:
Set up Python
uses
:
actions/setup-python@v2
uses
:
actions/setup-python@v2
...
...
CMakeLists.txt
View file @
0a8342b8
...
@@ -93,11 +93,14 @@ rocm_enable_clang_tidy(
...
@@ -93,11 +93,14 @@ rocm_enable_clang_tidy(
modernize-*
modernize-*
performance-*
performance-*
readability-*
readability-*
-bugprone-signed-char-misuse
-bugprone-easily-swappable-parameters
-bugprone-implicit-widening-of-multiplication-result
-bugprone-macro-parentheses
-bugprone-macro-parentheses
-bugprone-signed-char-misuse
# Disable the aliased reserved identifiers
# Disable the aliased reserved identifiers
-cert-dcl37-c
-cert-dcl37-c
-cert-dcl51-cpp
-cert-dcl51-cpp
-cert-err33-c
-cert-str34-c
-cert-str34-c
# Disable all alpha checks by default
# Disable all alpha checks by default
-clang-analyzer-alpha*
-clang-analyzer-alpha*
...
@@ -127,6 +130,7 @@ rocm_enable_clang_tidy(
...
@@ -127,6 +130,7 @@ rocm_enable_clang_tidy(
-cppcoreguidelines-pro-type-union-access
-cppcoreguidelines-pro-type-union-access
-cppcoreguidelines-pro-type-vararg
-cppcoreguidelines-pro-type-vararg
-cppcoreguidelines-special-member-functions
-cppcoreguidelines-special-member-functions
-cppcoreguidelines-virtual-class-destructor
-google-readability-*
-google-readability-*
-google-runtime-int
-google-runtime-int
-google-runtime-references
-google-runtime-references
...
@@ -144,8 +148,10 @@ rocm_enable_clang_tidy(
...
@@ -144,8 +148,10 @@ rocm_enable_clang_tidy(
-readability-convert-member-functions-to-static
-readability-convert-member-functions-to-static
-readability-else-after-return
-readability-else-after-return
-readability-function-cognitive-complexity
-readability-function-cognitive-complexity
-readability-identifier-length
-readability-named-parameter
-readability-named-parameter
-readability-redundant-string-init
-readability-redundant-string-init
-readability-suspicious-call-argument
-readability-uppercase-literal-suffix
-readability-uppercase-literal-suffix
-*-avoid-c-arrays
-*-avoid-c-arrays
-*-explicit-constructor
-*-explicit-constructor
...
@@ -178,7 +184,7 @@ rocm_enable_cppcheck(
...
@@ -178,7 +184,7 @@ rocm_enable_cppcheck(
style
style
performance
performance
portability
portability
SUPPRESS
SUPPRESS
ConfigurationNotChecked
ConfigurationNotChecked
unmatchedSuppression
unmatchedSuppression
unusedFunction
unusedFunction
...
@@ -216,6 +222,7 @@ rocm_enable_cppcheck(
...
@@ -216,6 +222,7 @@ rocm_enable_cppcheck(
CPPCHECK=1
CPPCHECK=1
__device__=
__device__=
__host__=
__host__=
__global__=
)
)
enable_testing
()
enable_testing
()
...
...
Dockerfile
View file @
0a8342b8
FROM
ubuntu:
18
.04
FROM
ubuntu:
20
.04
ARG
PREFIX=/usr/local
ARG
PREFIX=/usr/local
...
@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
...
@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN
dpkg
--add-architecture
i386
RUN
dpkg
--add-architecture
i386
# Add rocm repository
# Add rocm repository
RUN
sh
-c
'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/
4.5
/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
RUN
sh
-c
'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/
5.0.2
/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies
# Install dependencies
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
...
@@ -16,16 +16,12 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
...
@@ -16,16 +16,12 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
cmake
\
cmake
\
curl
\
curl
\
doxygen
\
doxygen
\
g++-5
\
g++-7
\
g++-7
\
gdb
\
gdb
\
git
\
git
\
lcov
\
lcov
\
locales
\
locales
\
pkg-config
\
pkg-config
\
python
\
python-dev
\
python-pip
\
python3
\
python3
\
python3-dev
\
python3-dev
\
python3-pip
\
python3-pip
\
...
...
cmake/Embed.cmake
View file @
0a8342b8
...
@@ -94,5 +94,6 @@ function(add_embed_library EMBED_NAME)
...
@@ -94,5 +94,6 @@ function(add_embed_library EMBED_NAME)
generate_embed_source
(
${
EMBED_NAME
}
SRC
${
SRC_FILE
}
HEADER
${
HEADER_FILE
}
OBJECTS
${
OUTPUT_FILES
}
SYMBOLS
${
SYMBOLS
}
)
generate_embed_source
(
${
EMBED_NAME
}
SRC
${
SRC_FILE
}
HEADER
${
HEADER_FILE
}
OBJECTS
${
OUTPUT_FILES
}
SYMBOLS
${
SYMBOLS
}
)
add_library
(
${
EMBED_NAME
}
STATIC
${
OUTPUT_FILES
}
"
${
SRC_FILE
}
"
)
add_library
(
${
EMBED_NAME
}
STATIC
${
OUTPUT_FILES
}
"
${
SRC_FILE
}
"
)
target_include_directories
(
${
EMBED_NAME
}
PUBLIC
"
${
EMBED_DIR
}
/include"
)
target_include_directories
(
${
EMBED_NAME
}
PUBLIC
"
${
EMBED_DIR
}
/include"
)
target_compile_options
(
${
EMBED_NAME
}
PRIVATE -Wno-reserved-identifier
)
set_target_properties
(
${
EMBED_NAME
}
PROPERTIES POSITION_INDEPENDENT_CODE On
)
set_target_properties
(
${
EMBED_NAME
}
PROPERTIES POSITION_INDEPENDENT_CODE On
)
endfunction
()
endfunction
()
cmake/EnableCompilerWarnings.cmake
View file @
0a8342b8
...
@@ -96,6 +96,7 @@ else()
...
@@ -96,6 +96,7 @@ else()
-Wno-gnu-zero-variadic-macro-arguments
-Wno-gnu-zero-variadic-macro-arguments
-Wno-missing-prototypes
-Wno-missing-prototypes
-Wno-nested-anon-types
-Wno-nested-anon-types
-Wno-option-ignored
-Wno-padded
-Wno-padded
-Wno-shorten-64-to-32
-Wno-shorten-64-to-32
-Wno-sign-conversion
-Wno-sign-conversion
...
...
hip-clang.docker
View file @
0a8342b8
FROM
ubuntu:
18
.04
FROM
ubuntu:
20
.04
ARG
PREFIX=/usr/local
ARG
PREFIX=/usr/local
...
@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
...
@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN
dpkg
--add-architecture
i386
RUN
dpkg
--add-architecture
i386
# Add rocm repository
# Add rocm repository
RUN
sh
-c
'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/
4.5
/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
RUN
sh
-c
'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/
5.0.2
/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies
# Install dependencies
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
...
@@ -20,9 +20,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
...
@@ -20,9 +20,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
git
\
git
\
lcov
\
lcov
\
pkg-config
\
pkg-config
\
python
\
python-dev
\
python-pip
\
python3
\
python3
\
python3-dev
\
python3-dev
\
python3-pip
\
python3-pip
\
...
...
src/api/include/migraphx/migraphx.hpp
View file @
0a8342b8
...
@@ -314,6 +314,7 @@ struct interface_base : Base
...
@@ -314,6 +314,7 @@ struct interface_base : Base
T
**
y
=
reinterpret_cast
<
T
**>
(
out
);
T
**
y
=
reinterpret_cast
<
T
**>
(
out
);
T
*
x
=
reinterpret_cast
<
T
*>
(
input
);
T
*
x
=
reinterpret_cast
<
T
*>
(
input
);
assert
(
x
!=
nullptr
and
y
!=
nullptr
and
*
y
==
nullptr
);
assert
(
x
!=
nullptr
and
y
!=
nullptr
and
*
y
==
nullptr
);
// cppcheck-suppress useSmartPointer
*
y
=
new
T
(
*
x
);
// NOLINT
*
y
=
new
T
(
*
x
);
// NOLINT
});
});
};
};
...
@@ -339,6 +340,7 @@ struct interface_base : Base
...
@@ -339,6 +340,7 @@ struct interface_base : Base
template
<
class
T
,
class
Setter
,
class
F
>
template
<
class
T
,
class
Setter
,
class
F
>
void
set_auto_fp
(
Setter
setter
,
F
f
)
void
set_auto_fp
(
Setter
setter
,
F
f
)
{
{
// cppcheck-suppress constParameter
return
set_fp
<
T
>
(
setter
,
[
=
](
T
&
obj
,
auto
out
,
auto
...
xs
)
{
return
set_fp
<
T
>
(
setter
,
[
=
](
T
&
obj
,
auto
out
,
auto
...
xs
)
{
auto_invoke
(
f
,
out
,
obj
,
auto_convert_param
(
rank
<
2
>
{},
xs
)...);
auto_invoke
(
f
,
out
,
obj
,
auto_convert_param
(
rank
<
2
>
{},
xs
)...);
});
});
...
...
src/driver/marker_roctx.cpp
View file @
0a8342b8
...
@@ -17,7 +17,7 @@ class marker_roctx
...
@@ -17,7 +17,7 @@ class marker_roctx
std
::
function
<
int
(
const
char
*
)
>
sym_roctx_range_push
;
std
::
function
<
int
(
const
char
*
)
>
sym_roctx_range_push
;
std
::
function
<
int
()
>
sym_roctx_range_pop
;
std
::
function
<
int
()
>
sym_roctx_range_pop
;
uint64_t
range_id
;
uint64_t
range_id
=
0
;
public:
public:
marker_roctx
()
marker_roctx
()
...
...
src/include/migraphx/filesystem.hpp
View file @
0a8342b8
...
@@ -3,7 +3,10 @@
...
@@ -3,7 +3,10 @@
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#if defined(__has_include) && !defined(CPPCHECK)
#if defined(CPPCHECK)
#define MIGRAPHX_HAS_FILESYSTEM 1
#define MIGRAPHX_HAS_FILESYSTEM_TS 1
#elif defined(__has_include)
#if __has_include(<filesystem>) && __cplusplus >= 201703L
#if __has_include(<filesystem>) && __cplusplus >= 201703L
#define MIGRAPHX_HAS_FILESYSTEM 1
#define MIGRAPHX_HAS_FILESYSTEM 1
#else
#else
...
...
src/include/migraphx/optional.hpp
View file @
0a8342b8
...
@@ -3,7 +3,10 @@
...
@@ -3,7 +3,10 @@
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#if defined(__has_include) && !defined(CPPCHECK)
#if defined(CPPCHECK)
#define MIGRAPHX_HAS_OPTIONAL 1
#define MIGRAPHX_HAS_OPTIONAL_TS 1
#elif defined(__has_include)
#if __has_include(<optional>) && __cplusplus >= 201703L
#if __has_include(<optional>) && __cplusplus >= 201703L
#define MIGRAPHX_HAS_OPTIONAL 1
#define MIGRAPHX_HAS_OPTIONAL 1
#else
#else
...
...
src/onnx/parse_mean.cpp
View file @
0a8342b8
...
@@ -24,14 +24,17 @@ struct parse_mean : op_parser<parse_mean>
...
@@ -24,14 +24,17 @@ struct parse_mean : op_parser<parse_mean>
auto
divisor
=
info
.
add_literal
(
auto
divisor
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
args
[
0
]
->
get_shape
().
type
()},
{
num_data
}});
migraphx
::
literal
{
migraphx
::
shape
{
args
[
0
]
->
get_shape
().
type
()},
{
num_data
}});
return
std
::
accumulate
(
args
.
begin
(),
args
.
end
(),
args
[
0
],
[
&
](
auto
&
mean
,
auto
&
data_i
)
{
// TODO: Only divide when using floating-point
// Pre-divide each tensor element-wise by n to reduce risk of overflow during summation
return
std
::
accumulate
(
args
.
begin
()
+
1
,
data_i
=
info
.
add_broadcastable_binary_op
(
"div"
,
data_i
,
divisor
);
args
.
end
(),
info
.
add_broadcastable_binary_op
(
"div"
,
args
[
0
],
divisor
),
if
(
data_i
!=
args
[
0
])
[
&
](
auto
mean
,
auto
data_i
)
{
return
info
.
add_broadcastable_binary_op
(
"add"
,
mean
,
data_i
);
// Pre-divide each tensor element-wise by n to reduce risk of
return
data_i
;
// overflow during summation
});
auto
div
=
info
.
add_broadcastable_binary_op
(
"div"
,
data_i
,
divisor
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
mean
,
div
);
});
}
}
};
};
...
...
src/simplify_algebra.cpp
View file @
0a8342b8
...
@@ -995,7 +995,7 @@ struct find_split_transpose
...
@@ -995,7 +995,7 @@ struct find_split_transpose
auto
axis
=
any_cast
<
op
::
slice
>
(
slc
->
get_operator
()).
axes
.
front
();
auto
axis
=
any_cast
<
op
::
slice
>
(
slc
->
get_operator
()).
axes
.
front
();
auto
it
=
std
::
find
(
perm
.
begin
(),
perm
.
end
(),
axis
);
auto
it
=
std
::
find
(
perm
.
begin
(),
perm
.
end
(),
axis
);
assert
(
it
!=
perm
.
end
());
assert
(
it
!=
perm
.
end
());
auto
axis_new
=
static_cast
<
int64_t
>
(
std
::
distance
(
perm
.
begin
(),
it
)
)
;
int64_t
axis_new
=
std
::
distance
(
perm
.
begin
(),
it
);
for
(
auto
in
:
split_outputs
)
for
(
auto
in
:
split_outputs
)
{
{
...
...
src/targets/cpu/include/migraphx/cpu/parallel.hpp
View file @
0a8342b8
...
@@ -7,7 +7,16 @@
...
@@ -7,7 +7,16 @@
#ifdef MIGRAPHX_DISABLE_OMP
#ifdef MIGRAPHX_DISABLE_OMP
#include <migraphx/par_for.hpp>
#include <migraphx/par_for.hpp>
#else
#else
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-identifier"
#endif
#include <omp.h>
#include <omp.h>
#ifdef __clang__
#pragma clang diagnostic pop
#endif
#endif
#endif
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/cpu/include/migraphx/cpu/pointwise.hpp
View file @
0a8342b8
...
@@ -319,7 +319,7 @@ struct cpu_unary : reduce_dims_base, auto_register_op<cpu_unary<Op>>
...
@@ -319,7 +319,7 @@ struct cpu_unary : reduce_dims_base, auto_register_op<cpu_unary<Op>>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
check_shapes
{
inputs
,
*
this
}.
has
(
2
);
auto
s
=
inputs
.
at
(
0
);
const
auto
&
s
=
inputs
.
at
(
0
);
return
{
s
.
type
(),
s
.
lens
()};
return
{
s
.
type
(),
s
.
lens
()};
}
}
argument
argument
...
@@ -357,7 +357,7 @@ struct cpu_binary : reduce_dims_base, auto_register_op<cpu_binary<Op>>
...
@@ -357,7 +357,7 @@ struct cpu_binary : reduce_dims_base, auto_register_op<cpu_binary<Op>>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
auto
s
=
inputs
.
at
(
0
);
const
auto
&
s
=
inputs
.
at
(
0
);
return
{
s
.
type
(),
s
.
lens
()};
return
{
s
.
type
(),
s
.
lens
()};
}
}
...
...
src/targets/cpu/lowering.cpp
View file @
0a8342b8
...
@@ -223,7 +223,7 @@ struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
...
@@ -223,7 +223,7 @@ struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
auto
s
=
inputs
.
at
(
0
);
const
auto
&
s
=
inputs
.
at
(
0
);
return
{
s
.
type
(),
s
.
lens
()};
return
{
s
.
type
(),
s
.
lens
()};
}
}
...
...
src/targets/gpu/CMakeLists.txt
View file @
0a8342b8
...
@@ -93,7 +93,7 @@ add_library(migraphx_device
...
@@ -93,7 +93,7 @@ add_library(migraphx_device
)
)
add_library
(
compile_for_gpu INTERFACE
)
add_library
(
compile_for_gpu INTERFACE
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_compile_options
(
compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument
)
target_link_libraries
(
compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument
-Wno-option-ignored
)
check_cxx_compiler_flag
(
"--cuda-host-only -fhip-lambda-host-device -x hip"
HAS_HIP_LAMBDA_HOST_DEVICE
)
check_cxx_compiler_flag
(
"--cuda-host-only -fhip-lambda-host-device -x hip"
HAS_HIP_LAMBDA_HOST_DEVICE
)
if
(
HAS_HIP_LAMBDA_HOST_DEVICE
)
if
(
HAS_HIP_LAMBDA_HOST_DEVICE
)
message
(
STATUS
"Enable -fhip-lambda-host-device"
)
message
(
STATUS
"Enable -fhip-lambda-host-device"
)
...
...
src/targets/gpu/compile_hip.cpp
View file @
0a8342b8
...
@@ -133,6 +133,7 @@ struct hiprtc_program
...
@@ -133,6 +133,7 @@ struct hiprtc_program
std
::
vector
<
char
>
buffer
(
n
);
std
::
vector
<
char
>
buffer
(
n
);
MIGRAPHX_HIPRTC
(
hiprtcGetProgramLog
(
prog
.
get
(),
buffer
.
data
()));
MIGRAPHX_HIPRTC
(
hiprtcGetProgramLog
(
prog
.
get
(),
buffer
.
data
()));
assert
(
buffer
.
back
()
==
0
);
assert
(
buffer
.
back
()
==
0
);
// cppcheck-suppress returnDanglingLifetime
return
{
buffer
.
begin
(),
buffer
.
end
()
-
1
};
return
{
buffer
.
begin
(),
buffer
.
end
()
-
1
};
}
}
...
...
src/targets/gpu/jit/scatternd.cpp
View file @
0a8342b8
...
@@ -52,9 +52,8 @@ struct scatternd_compiler : compiler<scatternd_compiler>
...
@@ -52,9 +52,8 @@ struct scatternd_compiler : compiler<scatternd_compiler>
{
{
hip_compile_options
options
;
hip_compile_options
options
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
auto
out_s
=
inputs
.
back
();
options
.
inputs
=
inputs
;
options
.
inputs
=
inputs
;
options
.
output
=
out_s
;
options
.
output
=
inputs
.
back
()
;
options
.
kernel_name
=
"scatternd_kernel"
;
options
.
kernel_name
=
"scatternd_kernel"
;
options
.
virtual_inputs
=
inputs
;
options
.
virtual_inputs
=
inputs
;
auto
reduction
=
"assign_"
+
v
.
get
(
"reduction"
,
std
::
string
{
"none"
});
auto
reduction
=
"assign_"
+
v
.
get
(
"reduction"
,
std
::
string
{
"none"
});
...
...
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
View file @
0a8342b8
...
@@ -118,15 +118,13 @@ constexpr roalign_settings<Ts...> make_roalign_settings(Ts... xs)
...
@@ -118,15 +118,13 @@ constexpr roalign_settings<Ts...> make_roalign_settings(Ts... xs)
}
}
template
<
class
T
,
class
U
,
class
V
,
class
W
,
class
Settings
>
template
<
class
T
,
class
U
,
class
V
,
class
W
,
class
Settings
>
__device__
void
roialign
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
const
W
&
y_t
,
Settings
s
)
__device__
void
roialign
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
W
&
y_t
,
Settings
s
)
{
{
auto
index
=
make_index
();
auto
index
=
make_index
();
const
auto
x
=
x_t
.
begin
();
const
auto
x
=
x_t
.
begin
();
const
auto
rois
=
rois_t
.
begin
();
const
auto
rois
=
rois_t
.
begin
();
const
auto
ind
=
ind_t
.
begin
();
const
auto
ind
=
ind_t
.
begin
();
auto
out_ptr
=
y_t
.
begin
();
// input shape
// input shape
auto
x_lens
=
x_t
.
get_shape
().
lens
;
auto
x_lens
=
x_t
.
get_shape
().
lens
;
auto
channel_num
=
x_lens
[
1
];
auto
channel_num
=
x_lens
[
1
];
...
@@ -176,25 +174,25 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
...
@@ -176,25 +174,25 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
const
auto
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
const
auto
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
if
constexpr
(
s
.
is_avg_pooling
)
if
constexpr
(
s
.
is_avg_pooling
)
{
{
out_ptr
[
i
]
=
calc_pooling
(
offset_x
,
y_t
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
roi_starts
,
bin_size
,
bin_size
,
{
ph
,
pw
},
{
ph
,
pw
},
bin_grid_size
,
bin_grid_size
,
in_dims
,
in_dims
,
s
.
roi_offset
,
s
.
roi_offset
,
avg_pool
{});
avg_pool
{});
}
}
else
else
{
{
out_ptr
[
i
]
=
calc_pooling
(
offset_x
,
y_t
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
roi_starts
,
bin_size
,
bin_size
,
{
ph
,
pw
},
{
ph
,
pw
},
bin_grid_size
,
bin_grid_size
,
in_dims
,
in_dims
,
s
.
roi_offset
,
s
.
roi_offset
,
max_pool
{});
max_pool
{});
}
}
}
}
}
}
...
...
src/targets/ref/lowering.cpp
View file @
0a8342b8
...
@@ -505,7 +505,7 @@ struct ref_unary : auto_register_op<ref_unary<Op>>
...
@@ -505,7 +505,7 @@ struct ref_unary : auto_register_op<ref_unary<Op>>
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
auto
s
=
inputs
.
at
(
0
);
const
auto
&
s
=
inputs
.
at
(
0
);
return
{
s
.
type
(),
s
.
lens
()};
return
{
s
.
type
(),
s
.
lens
()};
}
}
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment