Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
30a0b238
Commit
30a0b238
authored
Nov 16, 2022
by
Khalique Ahmed
Browse files
manual merge
parents
72aabeb5
0b6b33bc
Changes
38
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
476 additions
and
135 deletions
+476
-135
.github/workflows/ci.yaml
.github/workflows/ci.yaml
+33
-23
Dockerfile
Dockerfile
+2
-1
src/api/include/migraphx/migraphx.hpp
src/api/include/migraphx/migraphx.hpp
+1
-0
src/common.cpp
src/common.cpp
+97
-14
src/include/migraphx/common.hpp
src/include/migraphx/common.hpp
+3
-0
src/include/migraphx/op/binary.hpp
src/include/migraphx/op/binary.hpp
+14
-4
src/include/migraphx/op/broadcast.hpp
src/include/migraphx/op/broadcast.hpp
+90
-33
src/include/migraphx/op/multibroadcast.hpp
src/include/migraphx/op/multibroadcast.hpp
+66
-25
src/include/migraphx/shape.hpp
src/include/migraphx/shape.hpp
+20
-1
src/onnx/parse_batchnorm.cpp
src/onnx/parse_batchnorm.cpp
+1
-1
src/onnx/parse_binary_op.cpp
src/onnx/parse_binary_op.cpp
+6
-0
src/onnx/parse_split.cpp
src/onnx/parse_split.cpp
+18
-6
src/pass_manager.cpp
src/pass_manager.cpp
+8
-0
src/shape.cpp
src/shape.cpp
+43
-7
src/targets/gpu/compile_miopen.cpp
src/targets/gpu/compile_miopen.cpp
+1
-1
src/targets/gpu/compile_ops.cpp
src/targets/gpu/compile_ops.cpp
+10
-3
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+33
-3
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+1
-1
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+1
-1
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+28
-11
No files found.
.github/workflows/ci.yaml
View file @
30a0b238
...
...
@@ -7,7 +7,7 @@ jobs:
runs-on
:
ubuntu-latest
steps
:
-
name
:
Cancel Previous Runs
uses
:
styfle/cancel-workflow-action@0.
6
.0
uses
:
styfle/cancel-workflow-action@0.
11
.0
with
:
access_token
:
${{ github.token }}
tidy
:
...
...
@@ -15,9 +15,19 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v2
run
:
|
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
du . --max-depth=1 -h
ls -la
cd /usr/local
du . --max-depth=1 -h
ls -la
cd /usr/local/lib
echo $(pwd)
du . --max-depth=1 -h
ls -la
-
uses
:
actions/checkout@v3
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
...
...
@@ -34,7 +44,7 @@ jobs:
message("::set-output name=timestamp::${current_date}")
-
name
:
Cache files for tidy
uses
:
pat-s/always-upload-cache@v
2.1.3
uses
:
pat-s/always-upload-cache@v
3.0.11
with
:
path
:
tidy-cache
key
:
tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
...
...
@@ -65,8 +75,8 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
...
...
@@ -110,8 +120,8 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
...
...
@@ -146,10 +156,10 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
-
name
:
Set up Python
uses
:
actions/setup-python@v
2
uses
:
actions/setup-python@v
4
with
:
python-version
:
3.8
-
name
:
Install pyflakes
...
...
@@ -167,10 +177,10 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
-
name
:
Set up Python
uses
:
actions/setup-python@v
2
uses
:
actions/setup-python@v
4
with
:
python-version
:
3.8
-
name
:
run License Check
...
...
@@ -198,16 +208,16 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
-
name
:
Set up Python
uses
:
actions/setup-python@v
2
uses
:
actions/setup-python@v
4
with
:
python-version
:
3.7
-
name
:
Cache dependencies
# Ignore the failure of a step and avoid terminating the job.
continue-on-error
:
true
uses
:
actions/cache@v
2
uses
:
actions/cache@v
3
with
:
# This path is specific to Ubuntu
path
:
${{ github.workspace }}/cget
...
...
@@ -294,16 +304,16 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
-
name
:
Set up Python
uses
:
actions/setup-python@v
2
uses
:
actions/setup-python@v
4
with
:
python-version
:
3.7
-
name
:
Cache dependencies
# Ignore the failure of a step and avoid terminating the job.
continue-on-error
:
true
uses
:
actions/cache@v
2
uses
:
actions/cache@v
3
with
:
# This path is specific to Ubuntu
path
:
${{ github.workspace }}/cget
...
...
Dockerfile
View file @
30a0b238
...
...
@@ -74,7 +74,8 @@ RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cma
RUN
cget
-p
$PREFIX
install
ccache@v4.1
-DENABLE_TESTING
=
OFF
# Install newer cmake for onnx runtime
RUN
cget
-p
/opt/cmake
install
kitware/cmake@v3.13.4
ARG
CMAKE_VERSION=3.24.2
RUN
cget
-p
/opt/cmake
install
-X
binary https://github.com/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/cmake-
${
CMAKE_VERSION
}
-Linux-x86_64
.tar.gz
ARG
ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
ARG
ONNXRUNTIME_BRANCH=main
...
...
src/api/include/migraphx/migraphx.hpp
View file @
30a0b238
...
...
@@ -32,6 +32,7 @@
#include <memory>
#include <numeric>
#include <exception>
#include <array>
#include <vector>
#include <cassert>
#include <iostream>
...
...
src/common.cpp
View file @
30a0b238
...
...
@@ -27,6 +27,7 @@
#include <migraphx/algorithm.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -43,6 +44,7 @@ inline namespace MIGRAPHX_INLINE_NS {
// In this case we need to broadcast the (:,:,1:,:) axis
// of s0 plus the 1st dimension of s1 giving
// output_lens = (3,2,7,5)
//
std
::
vector
<
std
::
size_t
>
compute_broadcasted_lens
(
std
::
vector
<
std
::
size_t
>
s0
,
std
::
vector
<
std
::
size_t
>
s1
)
{
...
...
@@ -50,25 +52,63 @@ std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
return
s0
;
if
(
s0
.
size
()
>
s1
.
size
())
s0
.
swap
(
s1
);
std
::
vector
<
std
::
size_t
>
out_lens
(
s1
);
auto
offset
=
s1
.
size
()
-
s0
.
size
();
std
::
transform
(
s0
.
begin
(),
s0
.
end
(),
s1
.
begin
()
+
offset
,
out_lens
.
begin
()
+
offset
,
[
&
](
auto
a
,
auto
b
)
{
if
(
a
!=
b
and
a
!=
1
and
b
!=
1
)
{
MIGRAPHX_THROW
(
"COMPUTE_BROADCASTLEN: shape {"
+
to_string_range
(
s0
)
+
"} and {"
+
to_string_range
(
s1
)
+
"} mismatch!"
);
MIGRAPHX_THROW
(
"COMPUTE_BROADCASTLEN: shape {"
+
migraphx
::
to_string_range
(
s0
)
+
"} and {"
+
migraphx
::
to_string_range
(
s1
)
+
"} mismatch!"
);
}
return
std
::
max
(
a
,
b
);
});
return
out_lens
;
}
std
::
vector
<
shape
::
dynamic_dimension
>
compute_broadcasted_dyn_dims
(
shape
s0
,
shape
s1
)
{
// change both shapes to dynamic_dimension representation
s0
=
s0
.
to_dynamic
();
s1
=
s1
.
to_dynamic
();
if
(
s0
.
ndim
()
>
s1
.
ndim
())
{
std
::
swap
(
s0
,
s1
);
}
auto
offset
=
s1
.
ndim
()
-
s0
.
ndim
();
std
::
vector
<
shape
::
dynamic_dimension
>
out_dims
(
s1
.
dyn_dims
());
shape
::
dynamic_dimension
one_dyn_dim
{
1
,
1
,
0
};
std
::
transform
(
s0
.
dyn_dims
().
cbegin
(),
s0
.
dyn_dims
().
cend
(),
s1
.
dyn_dims
().
cbegin
()
+
offset
,
out_dims
.
begin
()
+
offset
,
[
&
](
auto
a
,
auto
b
)
{
if
(
a
==
b
)
{
return
a
;
}
else
if
(
a
==
one_dyn_dim
or
b
==
one_dyn_dim
)
{
// setting opt to 0, may need to be changed
return
shape
::
dynamic_dimension
{
std
::
max
(
a
.
min
,
b
.
min
),
std
::
max
(
a
.
max
,
b
.
max
),
0
};
}
else
{
MIGRAPHX_THROW
(
"COMPUTE_BROADCASTED_DYN_DIMS: dynamic shapes {"
+
migraphx
::
to_string_range
(
s0
.
dyn_dims
())
+
"} and {"
+
migraphx
::
to_string_range
(
s1
.
dyn_dims
())
+
"} mismatch!"
);
}
});
return
out_dims
;
}
// Compute the common (broadcasted) dimensions of a list of fixed shapes
std
::
vector
<
std
::
size_t
>
compute_common_lens
(
const
std
::
vector
<
shape
>&
shapes
)
{
assert
(
not
shapes
.
empty
());
assert
(
std
::
none_of
(
shapes
.
cbegin
(),
shapes
.
cend
(),
[](
auto
shape
)
{
return
shape
.
dynamic
();
}));
return
transform_accumulate
(
shapes
.
begin
()
+
1
,
shapes
.
end
(),
shapes
.
front
().
lens
(),
...
...
@@ -114,6 +154,48 @@ instruction_ref insert_common_op(module& m,
const
operation
&
op
,
std
::
vector
<
instruction_ref
>
inputs
)
{
if
(
std
::
any_of
(
inputs
.
cbegin
(),
inputs
.
cend
(),
[](
auto
input
)
{
return
input
->
get_shape
().
dynamic
();
}))
{
// currently only handles the binary case
if
(
inputs
.
size
()
!=
2
)
{
MIGRAPHX_THROW
(
"INSERT_COMMON_OP: not handled; "
+
migraphx
::
to_string
(
inputs
.
size
())
+
"inputs, only handle two inputs if any are dynamic shape"
);
}
auto
c_type
=
compute_common_types
(
to_shapes
(
inputs
));
auto
c_dyn_dims
=
compute_broadcasted_dyn_dims
(
inputs
[
0
]
->
get_shape
(),
inputs
[
1
]
->
get_shape
());
// following should work for a static or dynamic shape
if
(
inputs
[
0
]
->
get_shape
().
dyn_dims
()
!=
c_dyn_dims
)
{
inputs
[
0
]
=
m
.
insert_instruction
(
ins
,
make_op
(
"multibroadcast"
,
{{
"out_dyn_dims"
,
to_value
(
c_dyn_dims
)}}),
inputs
[
0
],
inputs
[
1
]);
}
if
(
inputs
[
1
]
->
get_shape
().
dyn_dims
()
!=
c_dyn_dims
)
{
inputs
[
1
]
=
m
.
insert_instruction
(
ins
,
make_op
(
"multibroadcast"
,
{{
"out_dyn_dims"
,
to_value
(
c_dyn_dims
)}}),
inputs
[
1
],
inputs
[
0
]);
}
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
auto
input
)
{
if
(
input
->
get_shape
().
type
()
!=
c_type
)
{
input
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
c_type
}}),
input
);
}
return
input
;
});
}
else
{
auto
common
=
common_shape
(
to_shapes
(
inputs
));
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
auto
input
)
{
if
(
input
->
get_shape
().
lens
()
!=
common
.
lens
())
...
...
@@ -128,6 +210,7 @@ instruction_ref insert_common_op(module& m,
}
return
input
;
});
}
return
m
.
insert_instruction
(
ins
,
op
,
inputs
);
}
...
...
src/include/migraphx/common.hpp
View file @
30a0b238
...
...
@@ -36,6 +36,9 @@ struct operation;
std
::
vector
<
std
::
size_t
>
compute_broadcasted_lens
(
std
::
vector
<
std
::
size_t
>
s0
,
std
::
vector
<
std
::
size_t
>
s1
);
std
::
vector
<
shape
::
dynamic_dimension
>
compute_broadcasted_dyn_dims
(
shape
s0
,
shape
s1
);
shape
common_shape
(
const
std
::
vector
<
shape
>&
shapes
);
instruction_ref
insert_common_op
(
module
&
m
,
...
...
src/include/migraphx/op/binary.hpp
View file @
30a0b238
...
...
@@ -28,6 +28,7 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -60,10 +61,19 @@ struct binary : op_name<Derived>
value
attributes
()
const
{
return
base_attributes
();
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
2
).
same_type
().
same_dims
();
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
),
true
}
.
has
(
2
)
.
same_type
()
.
same_dims
();
auto
s0
=
inputs
.
at
(
0
);
auto
s1
=
inputs
.
at
(
1
);
if
(
s0
==
s1
and
s0
.
packed
())
if
(
s0
.
dynamic
()
or
s1
.
dynamic
())
{
if
(
s0
==
s1
)
return
s0
;
MIGRAPHX_THROW
(
"BINARY: "
+
point_function
()
+
": fixed-dyn shape for inputs"
);
}
else
if
(
s0
==
s1
and
s0
.
packed
())
{
return
s0
;
}
...
...
@@ -81,9 +91,9 @@ struct binary : op_name<Derived>
}
}
argument
compute
(
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
out
put_shape
};
argument
result
{
dyn_out
.
com
put
ed
_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
std
::
transform
(
input1
.
begin
(),
input1
.
end
(),
...
...
src/include/migraphx/op/broadcast.hpp
View file @
30a0b238
...
...
@@ -27,23 +27,30 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/dyn_output.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
/// The broadcast operator performs the numpy-style broadcasting of an axis of a given tensor. This
/// is achieved primarily by setting the stride of the broadcasted axis to zero. Linear indicies are
/// computed from multi-indicies by computing the inner product on the multi-index with the strides.
/// For example, if we have a tensor A(2,3) it has lengths of (2,3) and strides of (3,1). If we want
/// to compute the linear offset that corresponds to the element on the 2nd row (i = 1) and 3rd
/// column (j = 2), we compute the following inner product (1,2) dot (3, 1) = 1*3 + 2*1 = 5. It is
/// obvious from there that we can negate the effects of a given axis by setting the stride of that
/// axis to zero.
/**
* 1 input version:
* Broadcasts a tensor from the original shape to the broadcast_lens by setting the stride of
* broadcasted dimensions to zero. `axis` attribute for a 1D input shape is the output dimension
* that stays the same. ex: broadcasting shape [1024] -> [4, 1024, 3] has axis = 1 For higher rank
* input shapes, axis is an offset parameter for the broadcasting. Such that this operator would
* work in the opposite direction of NumPy broadcasting. ex: broadcasting shape [2, 2] -> [2, 2, 3]
* with axis = 0
*
* 2 input version:
* Broadcast the first input 1D shape into the second input shape based on the axis parameter.
* Handles broadcasting a 1D static shape into a higher rank dynamic shape.
* broadcast_lens is not used
*/
struct
broadcast
{
uint64_t
axis
=
0
;
std
::
vector
<
std
::
size_t
>
broadcast_lens
;
std
::
vector
<
std
::
size_t
>
broadcast_lens
=
{}
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
...
...
@@ -54,36 +61,86 @@ struct broadcast
std
::
string
name
()
const
{
return
"broadcast"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
auto
input
=
inputs
.
at
(
0
);
auto
t
=
input
.
type
();
std
::
vector
<
size_t
>
bcast_strides
(
broadcast_lens
.
size
(),
0
);
// the broacast op is deprecated now, so not handling the negative
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
);
auto
s0
=
inputs
.
at
(
0
);
auto
t
=
s0
.
type
();
if
(
inputs
.
size
()
==
1
)
{
// the ONNX broadcast op is deprecated now, so not handling the negative
// value of axis anymore
if
(
axis
>=
broadcast_lens
.
size
())
{
MIGRAPHX_THROW
(
"BROADCAST : axis is out of range"
);
MIGRAPHX_THROW
(
"BROADCAST : axis "
+
migraphx
::
to_string
(
axis
)
+
" is out of range"
);
}
if
(
broadcast_lens
.
size
()
-
axis
<
input
.
lens
().
size
())
if
(
broadcast_lens
.
size
()
-
axis
<
s0
.
lens
().
size
())
{
MIGRAPHX_THROW
(
"BROADCAST: (broadcast ndims - axis) is less than
input
ndims"
);
MIGRAPHX_THROW
(
"BROADCAST: (broadcast ndims - axis) is less than
s0
ndims"
);
}
if
(
not
std
::
equal
(
input
.
lens
().
begin
(),
input
.
lens
().
end
(),
broadcast_lens
.
begin
()
+
axis
))
if
(
not
std
::
equal
(
s0
.
lens
().
begin
(),
s0
.
lens
().
end
(),
broadcast_lens
.
begin
()
+
axis
))
{
MIGRAPHX_THROW
(
"BROADCAST: when broadcasting, succeeding sizes must match"
);
}
std
::
copy
(
input
.
strides
().
begin
(),
input
.
strides
().
end
(),
bcast_strides
.
begin
()
+
axis
);
std
::
vector
<
size_t
>
bcast_strides
(
broadcast_lens
.
size
(),
0
);
std
::
copy
(
s0
.
strides
().
begin
(),
s0
.
strides
().
end
(),
bcast_strides
.
begin
()
+
axis
);
shape
output
{
t
,
broadcast_lens
,
std
::
move
(
bcast_strides
)};
if
(
output
.
elements
()
<
input
.
elements
())
MIGRAPHX_THROW
(
"BROADCAST: output size must be greater than or equal to input size"
);
if
(
output
.
elements
()
<
s0
.
elements
())
{
// don't think this can occur?
MIGRAPHX_THROW
(
"BROADCAST: output size must be greater than or equal to s0 size"
);
}
return
output
;
}
else
{
// two inputs
auto
s1
=
inputs
.
at
(
1
);
if
(
s0
.
dynamic
())
{
MIGRAPHX_THROW
(
"BROADCAST_2in: s0 is a dynamic shape, does not handle broadcasting "
"a dynamic shape"
);
}
if
(
s0
.
ndim
()
!=
1
)
{
MIGRAPHX_THROW
(
"BROADCAST_2in: s0 has ndim "
+
migraphx
::
to_string
(
s0
.
ndim
())
+
", only handle ndim = 1"
);
}
if
(
axis
>=
s1
.
ndim
())
{
MIGRAPHX_THROW
(
"BROADCAST_2in: axis "
+
migraphx
::
to_string
(
axis
)
+
" is out of range"
);
}
if
(
s1
.
dynamic
())
{
s0
=
s0
.
to_dynamic
();
if
(
s0
.
dyn_dims
()[
0
]
!=
s1
.
dyn_dims
()[
axis
])
{
MIGRAPHX_THROW
(
"BROADCAST_2in: s0 length doesn't match with dynamic s1 axis "
"dimension length ("
+
migraphx
::
to_string
(
s0
.
dyn_dims
()[
0
])
+
" != "
+
migraphx
::
to_string
(
s1
.
dyn_dims
()[
axis
])
+
")"
);
}
return
s1
;
}
if
(
s0
.
lens
()[
0
]
!=
s1
.
lens
()[
axis
])
{
MIGRAPHX_THROW
(
"BROADCAST_2in: s0 length doesn't match with static s1 axis "
"dimension length ("
+
migraphx
::
to_string
(
s0
.
lens
()[
0
])
+
" != "
+
migraphx
::
to_string
(
s1
.
lens
()[
axis
])
+
")"
);
}
std
::
vector
<
size_t
>
bcast_strides
(
s1
.
ndim
(),
0
);
std
::
copy
(
s0
.
strides
().
begin
(),
s0
.
strides
().
end
(),
bcast_strides
.
begin
()
+
axis
);
shape
output
{
t
,
s1
.
lens
(),
std
::
move
(
bcast_strides
)};
return
output
;
}
argument
compute
(
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
}
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
return
args
[
0
].
reshape
(
out
put_shape
);
return
args
[
0
].
reshape
(
dyn_out
.
com
put
ed
_shape
);
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
0
;
}
};
...
...
src/include/migraphx/op/multibroadcast.hpp
View file @
30a0b238
...
...
@@ -26,64 +26,105 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/common.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
/**
* Broadcast multiple dimensions between two tensors.
* Two versions of this operator: one input and two inputs.
* One input version uses output_lens attribute and broadcasts to it.
* Two inputs version broadcasts both inputs to the common shape at evaluation time.
*/
struct
multibroadcast
{
std
::
vector
<
std
::
size_t
>
output_lens
;
std
::
vector
<
std
::
size_t
>
output_lens
=
{};
// optional attribute
std
::
vector
<
shape
::
dynamic_dimension
>
output_dyn_dims
=
{};
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
output_lens
,
"out_lens"
));
return
pack
(
f
(
self
.
output_lens
,
"out_lens"
)
,
f
(
self
.
output_dyn_dims
,
"out_dyn_dims"
)
);
}
std
::
string
name
()
const
{
return
"multibroadcast"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
);
auto
t
=
inputs
.
at
(
0
).
type
();
auto
input
=
inputs
.
at
(
0
);
auto
s0
=
inputs
.
at
(
0
);
if
(
input
.
lens
().
empty
())
if
(
s0
.
max_
lens
().
empty
())
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: input
s
dimensions should be > 0"
);
MIGRAPHX_THROW
(
"MULTIBROADCAST: input dimensions should be > 0"
);
}
if
(
input
.
lens
().
size
()
>
output_lens
.
size
())
auto
make_bcast_strides
=
[
&
](
std
::
vector
<
std
::
size_t
>
bcast_lens
,
std
::
size_t
offset
)
{
std
::
vector
<
size_t
>
bcast_strides
(
bcast_lens
.
size
(),
0
);
for
(
std
::
ptrdiff_t
i
=
s0
.
lens
().
size
()
-
1
;
i
>=
0
;
i
--
)
{
if
(
bcast_lens
[
i
+
offset
]
==
s0
.
lens
()[
i
])
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: inputs dimensions should <= output size"
)
;
bcast_strides
[
i
+
offset
]
=
s0
.
strides
()[
i
]
;
}
}
return
bcast_strides
;
};
auto
offset
=
output_lens
.
size
()
-
input
.
lens
().
size
();
for
(
std
::
ptrdiff_t
i
=
input
.
lens
().
size
()
-
1
;
i
>=
0
;
i
--
)
if
(
inputs
.
size
()
==
1
)
{
if
(
s0
.
lens
().
size
()
>
output_lens
.
size
())
{
if
(
output_lens
[
i
+
offset
]
!=
input
.
lens
()[
i
]
and
input
.
lens
()[
i
]
!=
1
)
MIGRAPHX_THROW
(
"MULTIBROADCAST: input dimensions should <= output size"
);
}
auto
offset
=
output_lens
.
size
()
-
s0
.
lens
().
size
();
for
(
std
::
ptrdiff_t
i
=
s0
.
lens
().
size
()
-
1
;
i
>=
0
;
i
--
)
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: input shape {"
+
to_string_range
(
input
.
lens
())
+
if
(
output_lens
[
i
+
offset
]
!=
s0
.
lens
()[
i
]
and
s0
.
lens
()[
i
]
!=
1
)
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: input shape {"
+
to_string_range
(
s0
.
lens
())
+
"} cannot be broadcasted to {"
+
to_string_range
(
output_lens
)
+
"}!"
);
}
}
std
::
vector
<
size_t
>
bcast_strides
(
output_lens
.
size
(),
0
);
for
(
std
::
ptrdiff_t
i
=
input
.
lens
().
size
()
-
1
;
i
>=
0
;
i
--
)
auto
bcast_strides
=
make_bcast_strides
(
output_lens
,
offset
);
return
{
t
,
output_lens
,
std
::
move
(
bcast_strides
)};
}
else
{
if
(
output_lens
[
i
+
offset
]
==
input
.
lens
()[
i
])
// two inputs
auto
s1
=
inputs
.
at
(
1
);
if
(
s0
.
dynamic
()
or
s1
.
dynamic
())
{
bcast_strides
[
i
+
offset
]
=
input
.
strides
()[
i
];
if
(
not
output_dyn_dims
.
empty
())
{
return
{
t
,
output_dyn_dims
};
}
return
{
t
,
compute_broadcasted_dyn_dims
(
s0
,
s1
)};
}
return
{
t
,
output_lens
,
bcast_strides
};
else
{
auto
bcast_lens
=
compute_broadcasted_lens
(
s0
.
lens
(),
s1
.
lens
());
auto
offset
=
bcast_lens
.
size
()
-
s0
.
lens
().
size
();
auto
bcast_strides
=
make_bcast_strides
(
bcast_lens
,
offset
);
return
{
t
,
std
::
move
(
bcast_lens
),
std
::
move
(
bcast_strides
)};
}
argument
compute
(
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
}
}
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
return
args
[
0
].
reshape
(
out
put_shape
);
return
args
[
0
].
reshape
(
dyn_out
.
com
put
ed
_shape
);
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
0
;
}
};
...
...
src/include/migraphx/shape.hpp
View file @
30a0b238
...
...
@@ -30,6 +30,7 @@
#include <numeric>
#include <memory>
#include <migraphx/functional.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/half.hpp>
#include <migraphx/config.hpp>
...
...
@@ -89,7 +90,10 @@ struct shape
std
::
size_t
opt
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
);
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
min
,
"min"
),
f
(
self
.
max
,
"max"
),
f
(
self
.
opt
,
"opt"
));
}
bool
is_fixed
()
const
;
bool
has_optimal
()
const
;
...
...
@@ -115,6 +119,12 @@ struct shape
shape
(
type_t
t
,
std
::
vector
<
dynamic_dimension
>
dims
);
// Construct a dynamic shape from three sets of lengths (of the same rank)
shape
(
type_t
t
,
std
::
vector
<
std
::
size_t
>
mins
,
std
::
vector
<
std
::
size_t
>
maxes
,
std
::
vector
<
std
::
size_t
>
opts
);
template
<
class
Range
>
shape
(
type_t
t
,
const
Range
&
l
)
:
shape
(
t
,
std
::
vector
<
std
::
size_t
>
(
l
.
begin
(),
l
.
end
()))
{
...
...
@@ -136,6 +146,12 @@ struct shape
const
std
::
vector
<
std
::
size_t
>&
lens
()
const
;
const
std
::
vector
<
std
::
size_t
>&
strides
()
const
;
/*!
* The number of dimensions in the shape.
* Same as the number of indices required to get a data value.
*/
std
::
size_t
ndim
()
const
;
/*!
* Return the number of elements in the tensor.
*/
...
...
@@ -221,6 +237,9 @@ struct shape
shape
with_type
(
type_t
t
)
const
;
// convert the shape to an equivalent dynamic shape
shape
to_dynamic
()
const
;
friend
bool
operator
==
(
const
shape
&
x
,
const
shape
&
y
);
friend
bool
operator
!=
(
const
shape
&
x
,
const
shape
&
y
);
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
shape
&
x
);
...
...
src/onnx/parse_batchnorm.cpp
View file @
30a0b238
...
...
@@ -44,7 +44,7 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
{
epsilon
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"epsilon"
)).
at
<
float
>
();
}
auto
x_lens
=
args
[
0
]
->
get_shape
().
lens
();
auto
x_lens
=
args
[
0
]
->
get_shape
().
max_
lens
();
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
if
(
std
::
any_of
(
args
.
cbegin
()
+
1
,
args
.
cend
(),
[](
auto
a
)
{
...
...
src/onnx/parse_binary_op.cpp
View file @
30a0b238
...
...
@@ -57,6 +57,12 @@ struct parse_binary_op : op_parser<parse_binary_op>
parser
.
parse_value
(
info
.
attributes
.
at
(
"broadcast"
)).
at
<
uint64_t
>
();
if
(
broadcasted
!=
0
)
{
if
(
std
::
any_of
(
args
.
cbegin
(),
args
.
cend
(),
[](
auto
a
)
{
return
a
->
get_shape
().
dynamic
();
}))
{
MIGRAPHX_THROW
(
"Binary op broadcast attribute not supported for dynamic input shapes"
);
}
uint64_t
axis
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"axis"
)).
at
<
uint64_t
>
();
auto
l
=
info
.
add_instruction
(
make_op
(
"broadcast"
,
...
...
src/onnx/parse_split.cpp
View file @
30a0b238
...
...
@@ -26,6 +26,9 @@
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/stringutils.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -55,12 +58,12 @@ struct parse_split : op_parser<parse_split>
{
literal
s
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"split"
));
s
.
visit
([
&
](
auto
v
)
{
vec_splits
.
assign
(
v
.
begin
(),
v
.
end
());
});
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
static_cast
<
int64_t
>
(
lens
[
tuned_axis
]))
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: sum of split attribute unequal to dim size of axis!"
);
}
else
if
(
args
.
size
()
==
2
)
{
auto
s
=
args
[
1
]
->
eval
();
check_arg_empty
(
s
,
"Split: dynamic shape is not supported"
);
s
.
visit
([
&
](
auto
v
)
{
vec_splits
.
assign
(
v
.
begin
(),
v
.
end
());
});
}
// no split attribute, input is equally divided
else
...
...
@@ -74,6 +77,15 @@ struct parse_split : op_parser<parse_split>
vec_splits
.
resize
(
info
.
num_outputs
,
dl
);
}
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
static_cast
<
int64_t
>
(
lens
[
tuned_axis
]))
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: sum of split attribute unequal to dim size of axis! tuned axis:"
+
std
::
to_string
(
lens
[
tuned_axis
])
+
" Output "
+
to_string_range
(
vec_splits
)
+
" Rank "
+
std
::
to_string
(
n_rank
)
+
" Len outs "
+
to_string_range
(
lens
));
}
std
::
vector
<
instruction_ref
>
ret_ins
;
int64_t
start
=
0
;
for
(
auto
sl
:
vec_splits
)
...
...
src/pass_manager.cpp
View file @
30a0b238
...
...
@@ -94,11 +94,19 @@ struct module_pm : module_pass_manager
virtual
void
run_pass
(
const
pass
&
p
)
override
{
assert
(
mod
);
timer
ts
{};
using
seconds
=
std
::
chrono
::
duration
<
double
>
;
trace
(
"Module: "
,
mod
->
name
(),
", Pass: "
,
p
.
name
());
const
double
t1
=
ts
.
record
<
seconds
>
();
assert
(
mod
->
validate
()
==
mod
->
end
());
p
.
apply
(
*
this
);
trace
(
*
mod
);
validate_pass
(
*
mod
,
p
,
*
t
);
const
double
t2
=
ts
.
record
<
seconds
>
();
trace
(
"Pass: "
,
p
.
name
(),
" completed in (s): "
,
(
t2
-
t1
));
}
};
...
...
src/shape.cpp
View file @
30a0b238
...
...
@@ -71,6 +71,19 @@ struct shape_impl
{
}
shape_impl
(
shape
::
type_t
t
,
std
::
vector
<
std
::
size_t
>
mins
,
std
::
vector
<
std
::
size_t
>
maxes
,
std
::
vector
<
std
::
size_t
>
opts
)
:
m_type
(
t
)
{
assert
(
mins
.
size
()
==
maxes
.
size
()
and
maxes
.
size
()
==
opts
.
size
());
for
(
size_t
i
=
0
;
i
<
mins
.
size
();
++
i
)
{
m_dyn_dims
.
push_back
(
shape
::
dynamic_dimension
{
mins
[
i
],
maxes
[
i
],
opts
[
i
]});
}
}
shape_impl
(
const
std
::
vector
<
shape
>&
subs
)
:
m_type
(
shape
::
tuple_type
),
m_shapes
(
subs
)
{}
shape
::
type_t
m_type
;
...
...
@@ -224,6 +237,14 @@ shape::shape(type_t t, std::vector<shape::dynamic_dimension> dims)
{
}
shape
::
shape
(
type_t
t
,
std
::
vector
<
std
::
size_t
>
mins
,
std
::
vector
<
std
::
size_t
>
maxes
,
std
::
vector
<
std
::
size_t
>
opts
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
t
,
std
::
move
(
mins
),
std
::
move
(
maxes
),
std
::
move
(
opts
)))
{
}
shape
::
shape
(
const
std
::
vector
<
shape
>&
subs
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
subs
))
{}
shape
::
shape
(
std
::
shared_ptr
<
shape_impl
>
pimpl
)
:
impl
(
std
::
move
(
pimpl
))
{}
...
...
@@ -244,6 +265,15 @@ const std::vector<std::size_t>& shape::lens() const { return impl->m_lens; }
const
std
::
vector
<
std
::
size_t
>&
shape
::
strides
()
const
{
return
impl
->
m_strides
;
}
std
::
size_t
shape
::
ndim
()
const
{
if
(
this
->
dynamic
())
{
return
dyn_dims
().
size
();
}
return
lens
().
size
();
}
std
::
size_t
shape
::
elements
()
const
{
return
impl
->
elements
();
}
std
::
size_t
shape
::
bytes
()
const
...
...
@@ -437,6 +467,16 @@ shape shape::with_type(type_t t) const
return
{
c
};
}
shape
shape
::
to_dynamic
()
const
{
if
(
this
->
dynamic
())
{
return
*
this
;
}
std
::
vector
<
std
::
size_t
>
zeroes
(
this
->
ndim
(),
0
);
return
{
type
(),
lens
(),
lens
(),
zeroes
};
}
std
::
size_t
shape
::
element_space
()
const
{
return
impl
->
element_space
();
}
std
::
string
shape
::
type_string
()
const
{
return
name
(
this
->
type
());
}
...
...
@@ -464,15 +504,11 @@ bool shape::dynamic_dimension::is_fixed() const { return this->min == this->max;
bool
shape
::
dynamic_dimension
::
has_optimal
()
const
{
return
opt
!=
0
;
}
template
<
class
Self
,
class
F
>
auto
shape
::
dynamic_dimension
::
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
min
,
"min"
),
f
(
self
.
max
,
"max"
),
f
(
self
.
opt
,
"opt"
));
}
bool
operator
==
(
const
shape
::
dynamic_dimension
&
x
,
const
shape
::
dynamic_dimension
&
y
)
{
return
(
x
.
min
==
y
.
min
and
x
.
max
==
y
.
max
and
x
.
opt
==
y
.
opt
);
// don't check opt if both are fixed
return
(
x
.
min
==
y
.
min
and
x
.
max
==
y
.
max
and
((
x
.
is_fixed
()
and
y
.
is_fixed
())
or
(
x
.
opt
==
y
.
opt
)));
}
bool
operator
!=
(
const
shape
::
dynamic_dimension
&
x
,
const
shape
::
dynamic_dimension
&
y
)
...
...
src/targets/gpu/compile_miopen.cpp
View file @
30a0b238
...
...
@@ -64,7 +64,7 @@ std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool for
{
op
.
from_value
({{
"int8_x4_format"
,
format
}});
auto
v
=
op
.
compile
(
*
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
return
v
.
get
(
"workspace"
,
0
);
return
v
.
get
<
std
::
size_t
>
(
"workspace"
,
0
);
}
void
compile_miopen
::
apply
(
module
&
m
)
const
...
...
src/targets/gpu/compile_ops.cpp
View file @
30a0b238
...
...
@@ -40,18 +40,25 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
struct
precompile_op
{
operation
op
=
op
::
identity
{};
std
::
size_t
additional_args
=
1
;
bool
ignore_modules
=
false
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
op
,
"op"
));
return
pack
(
f
(
self
.
op
,
"op"
),
f
(
self
.
additional_args
,
"additional_args"
),
f
(
self
.
ignore_modules
,
"ignore_modules"
));
}
std
::
string
name
()
const
{
return
"gpu::precompile_op"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
{
inputs
.
pop_back
();
// Pop off additional args
inputs
.
resize
(
inputs
.
size
()
-
additional_args
);
if
(
ignore_modules
)
return
op
.
compute_shape
(
inputs
);
return
op
.
compute_shape
(
inputs
,
mods
);
}
...
...
src/targets/gpu/fuse_ops.cpp
View file @
30a0b238
...
...
@@ -772,11 +772,9 @@ struct find_layernorm_pointwise
{
auto
ins
=
r
.
result
;
auto
layernorm
=
r
.
instructions
[
"layernorm"
];
auto
*
pm
=
ins
->
module_inputs
().
front
();
if
(
not
layernorm
->
module_inputs
().
empty
())
return
;
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
inputs
=
layernorm
->
inputs
();
inputs
.
pop_back
();
inputs
.
insert
(
inputs
.
end
(),
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
());
...
...
@@ -785,6 +783,37 @@ struct find_layernorm_pointwise
}
};
struct
find_concat_pointwise
{
auto
matcher
()
const
{
return
precompile_name
(
"pointwise"
)(
match
::
arg
(
0
)(
precompile_name
(
"concat"
).
bind
(
"concat"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
concat
=
r
.
instructions
[
"concat"
];
if
(
not
concat
->
module_inputs
().
empty
())
return
;
// TODO: Handle type conversions
if
(
ins
->
get_shape
().
type
()
!=
concat
->
get_shape
().
type
())
return
;
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
inputs
=
concat
->
inputs
();
inputs
.
pop_back
();
inputs
.
insert
(
inputs
.
end
(),
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
());
auto
op
=
concat
->
get_operator
();
op
.
from_value
({{
"additional_args"
,
ins
->
inputs
().
size
()
-
1
},
{
"ignore_modules"
,
true
}});
m
.
replace_instruction
(
ins
,
op
,
inputs
,
{
pm
});
}
};
void
fuse_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_contiguous_pointwise
{});
...
...
@@ -793,6 +822,7 @@ void fuse_ops::apply(module& m) const
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
find_layernorm_pointwise
{},
find_concat_pointwise
{},
find_gemm_pointwise
{},
find_contiguous_tranpose_gemm
{},
find_commutative_broadcast
{});
...
...
src/targets/gpu/gemm_impl.cpp
View file @
30a0b238
...
...
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <rocblas.h>
#include <rocblas
/rocblas
.h>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/permutation.hpp>
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
30a0b238
...
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <rocblas.h>
#include <rocblas
/rocblas
.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/jit/concat.cpp
View file @
30a0b238
...
...
@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT
static
const
char
*
const
concat_kernel
=
R"__migraphx__(
#include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) {
concat<${axis}>(y, xs...);
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y,
${concat_params},
auto... xs) {
concat<${axis}>(
${concat_args})(${post},
y, xs...);
});
}
...
...
@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler>
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
// TODO: Use reduce_dims
auto
num_of_concat_inputs
=
v
.
get
(
"concat_inputs"
,
inputs
.
size
()
-
1
);
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
params
=
"-Wno-float-equal"
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
auto
src
=
interpolate_string
(
concat_kernel
,
auto
src
=
interpolate_string
(
concat_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"concat_params"
,
enum_params
(
num_of_concat_inputs
,
"auto concat_x"
)},
{
"concat_args"
,
enum_params
(
num_of_concat_inputs
,
"concat_x"
)},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
auto
v
=
op
.
to_value
();
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"concat_inputs"
]
=
ins
->
inputs
().
size
()
-
pm
->
get_parameter_names
().
size
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_concat"
);
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_concat)"
;
v
[
"kernel"
]
=
"concat_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
};
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment