Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
30a0b238
"vscode:/vscode.git/clone" did not exist on "2781ccd88c7a19f775fae8743de8de712e2ad86f"
Commit
30a0b238
authored
Nov 16, 2022
by
Khalique Ahmed
Browse files
manual merge
parents
72aabeb5
0b6b33bc
Changes
38
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
476 additions
and
135 deletions
+476
-135
.github/workflows/ci.yaml
.github/workflows/ci.yaml
+33
-23
Dockerfile
Dockerfile
+2
-1
src/api/include/migraphx/migraphx.hpp
src/api/include/migraphx/migraphx.hpp
+1
-0
src/common.cpp
src/common.cpp
+97
-14
src/include/migraphx/common.hpp
src/include/migraphx/common.hpp
+3
-0
src/include/migraphx/op/binary.hpp
src/include/migraphx/op/binary.hpp
+14
-4
src/include/migraphx/op/broadcast.hpp
src/include/migraphx/op/broadcast.hpp
+90
-33
src/include/migraphx/op/multibroadcast.hpp
src/include/migraphx/op/multibroadcast.hpp
+66
-25
src/include/migraphx/shape.hpp
src/include/migraphx/shape.hpp
+20
-1
src/onnx/parse_batchnorm.cpp
src/onnx/parse_batchnorm.cpp
+1
-1
src/onnx/parse_binary_op.cpp
src/onnx/parse_binary_op.cpp
+6
-0
src/onnx/parse_split.cpp
src/onnx/parse_split.cpp
+18
-6
src/pass_manager.cpp
src/pass_manager.cpp
+8
-0
src/shape.cpp
src/shape.cpp
+43
-7
src/targets/gpu/compile_miopen.cpp
src/targets/gpu/compile_miopen.cpp
+1
-1
src/targets/gpu/compile_ops.cpp
src/targets/gpu/compile_ops.cpp
+10
-3
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+33
-3
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+1
-1
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+1
-1
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+28
-11
No files found.
.github/workflows/ci.yaml
View file @
30a0b238
...
...
@@ -7,7 +7,7 @@ jobs:
runs-on
:
ubuntu-latest
steps
:
-
name
:
Cancel Previous Runs
uses
:
styfle/cancel-workflow-action@0.
6
.0
uses
:
styfle/cancel-workflow-action@0.
11
.0
with
:
access_token
:
${{ github.token }}
tidy
:
...
...
@@ -15,9 +15,19 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v2
run
:
|
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
du . --max-depth=1 -h
ls -la
cd /usr/local
du . --max-depth=1 -h
ls -la
cd /usr/local/lib
echo $(pwd)
du . --max-depth=1 -h
ls -la
-
uses
:
actions/checkout@v3
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
...
...
@@ -34,7 +44,7 @@ jobs:
message("::set-output name=timestamp::${current_date}")
-
name
:
Cache files for tidy
uses
:
pat-s/always-upload-cache@v
2.1.3
uses
:
pat-s/always-upload-cache@v
3.0.11
with
:
path
:
tidy-cache
key
:
tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
...
...
@@ -65,8 +75,8 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
...
...
@@ -110,8 +120,8 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
...
...
@@ -146,10 +156,10 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
-
name
:
Set up Python
uses
:
actions/setup-python@v
2
uses
:
actions/setup-python@v
4
with
:
python-version
:
3.8
-
name
:
Install pyflakes
...
...
@@ -167,10 +177,10 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
-
name
:
Set up Python
uses
:
actions/setup-python@v
2
uses
:
actions/setup-python@v
4
with
:
python-version
:
3.8
-
name
:
run License Check
...
...
@@ -198,16 +208,16 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
-
name
:
Set up Python
uses
:
actions/setup-python@v
2
uses
:
actions/setup-python@v
4
with
:
python-version
:
3.7
-
name
:
Cache dependencies
# Ignore the failure of a step and avoid terminating the job.
continue-on-error
:
true
uses
:
actions/cache@v
2
uses
:
actions/cache@v
3
with
:
# This path is specific to Ubuntu
path
:
${{ github.workspace }}/cget
...
...
@@ -294,16 +304,16 @@ jobs:
steps
:
-
name
:
Free space
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
uses
:
actions/checkout@v
2
run
:
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
/usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-
uses
:
actions/checkout@v
3
-
name
:
Set up Python
uses
:
actions/setup-python@v
2
uses
:
actions/setup-python@v
4
with
:
python-version
:
3.7
-
name
:
Cache dependencies
# Ignore the failure of a step and avoid terminating the job.
continue-on-error
:
true
uses
:
actions/cache@v
2
uses
:
actions/cache@v
3
with
:
# This path is specific to Ubuntu
path
:
${{ github.workspace }}/cget
...
...
Dockerfile
View file @
30a0b238
...
...
@@ -74,7 +74,8 @@ RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cma
RUN
cget
-p
$PREFIX
install
ccache@v4.1
-DENABLE_TESTING
=
OFF
# Install newer cmake for onnx runtime
RUN
cget
-p
/opt/cmake
install
kitware/cmake@v3.13.4
ARG
CMAKE_VERSION=3.24.2
RUN
cget
-p
/opt/cmake
install
-X
binary https://github.com/Kitware/CMake/releases/download/v
${
CMAKE_VERSION
}
/cmake-
${
CMAKE_VERSION
}
-Linux-x86_64
.tar.gz
ARG
ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
ARG
ONNXRUNTIME_BRANCH=main
...
...
src/api/include/migraphx/migraphx.hpp
View file @
30a0b238
...
...
@@ -32,6 +32,7 @@
#include <memory>
#include <numeric>
#include <exception>
#include <array>
#include <vector>
#include <cassert>
#include <iostream>
...
...
src/common.cpp
View file @
30a0b238
...
...
@@ -27,6 +27,7 @@
#include <migraphx/algorithm.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -43,6 +44,7 @@ inline namespace MIGRAPHX_INLINE_NS {
// In this case we need to broadcast the (:,:,1:,:) axis
// of s0 plus the 1st dimension of s1 giving
// output_lens = (3,2,7,5)
//
std
::
vector
<
std
::
size_t
>
compute_broadcasted_lens
(
std
::
vector
<
std
::
size_t
>
s0
,
std
::
vector
<
std
::
size_t
>
s1
)
{
...
...
@@ -50,25 +52,63 @@ std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
return
s0
;
if
(
s0
.
size
()
>
s1
.
size
())
s0
.
swap
(
s1
);
std
::
vector
<
std
::
size_t
>
out_lens
(
s1
);
auto
offset
=
s1
.
size
()
-
s0
.
size
();
std
::
transform
(
s0
.
begin
(),
s0
.
end
(),
s1
.
begin
()
+
offset
,
out_lens
.
begin
()
+
offset
,
[
&
](
auto
a
,
auto
b
)
{
if
(
a
!=
b
and
a
!=
1
and
b
!=
1
)
{
MIGRAPHX_THROW
(
"COMPUTE_BROADCASTLEN: shape {"
+
to_string_range
(
s0
)
+
"} and {"
+
to_string_range
(
s1
)
+
"} mismatch!"
);
MIGRAPHX_THROW
(
"COMPUTE_BROADCASTLEN: shape {"
+
migraphx
::
to_string_range
(
s0
)
+
"} and {"
+
migraphx
::
to_string_range
(
s1
)
+
"} mismatch!"
);
}
return
std
::
max
(
a
,
b
);
});
return
out_lens
;
}
std
::
vector
<
shape
::
dynamic_dimension
>
compute_broadcasted_dyn_dims
(
shape
s0
,
shape
s1
)
{
// change both shapes to dynamic_dimension representation
s0
=
s0
.
to_dynamic
();
s1
=
s1
.
to_dynamic
();
if
(
s0
.
ndim
()
>
s1
.
ndim
())
{
std
::
swap
(
s0
,
s1
);
}
auto
offset
=
s1
.
ndim
()
-
s0
.
ndim
();
std
::
vector
<
shape
::
dynamic_dimension
>
out_dims
(
s1
.
dyn_dims
());
shape
::
dynamic_dimension
one_dyn_dim
{
1
,
1
,
0
};
std
::
transform
(
s0
.
dyn_dims
().
cbegin
(),
s0
.
dyn_dims
().
cend
(),
s1
.
dyn_dims
().
cbegin
()
+
offset
,
out_dims
.
begin
()
+
offset
,
[
&
](
auto
a
,
auto
b
)
{
if
(
a
==
b
)
{
return
a
;
}
else
if
(
a
==
one_dyn_dim
or
b
==
one_dyn_dim
)
{
// setting opt to 0, may need to be changed
return
shape
::
dynamic_dimension
{
std
::
max
(
a
.
min
,
b
.
min
),
std
::
max
(
a
.
max
,
b
.
max
),
0
};
}
else
{
MIGRAPHX_THROW
(
"COMPUTE_BROADCASTED_DYN_DIMS: dynamic shapes {"
+
migraphx
::
to_string_range
(
s0
.
dyn_dims
())
+
"} and {"
+
migraphx
::
to_string_range
(
s1
.
dyn_dims
())
+
"} mismatch!"
);
}
});
return
out_dims
;
}
// Compute the common (broadcasted) dimensions of a list of fixed shapes
std
::
vector
<
std
::
size_t
>
compute_common_lens
(
const
std
::
vector
<
shape
>&
shapes
)
{
assert
(
not
shapes
.
empty
());
assert
(
std
::
none_of
(
shapes
.
cbegin
(),
shapes
.
cend
(),
[](
auto
shape
)
{
return
shape
.
dynamic
();
}));
return
transform_accumulate
(
shapes
.
begin
()
+
1
,
shapes
.
end
(),
shapes
.
front
().
lens
(),
...
...
@@ -114,6 +154,48 @@ instruction_ref insert_common_op(module& m,
const
operation
&
op
,
std
::
vector
<
instruction_ref
>
inputs
)
{
if
(
std
::
any_of
(
inputs
.
cbegin
(),
inputs
.
cend
(),
[](
auto
input
)
{
return
input
->
get_shape
().
dynamic
();
}))
{
// currently only handles the binary case
if
(
inputs
.
size
()
!=
2
)
{
MIGRAPHX_THROW
(
"INSERT_COMMON_OP: not handled; "
+
migraphx
::
to_string
(
inputs
.
size
())
+
"inputs, only handle two inputs if any are dynamic shape"
);
}
auto
c_type
=
compute_common_types
(
to_shapes
(
inputs
));
auto
c_dyn_dims
=
compute_broadcasted_dyn_dims
(
inputs
[
0
]
->
get_shape
(),
inputs
[
1
]
->
get_shape
());
// following should work for a static or dynamic shape
if
(
inputs
[
0
]
->
get_shape
().
dyn_dims
()
!=
c_dyn_dims
)
{
inputs
[
0
]
=
m
.
insert_instruction
(
ins
,
make_op
(
"multibroadcast"
,
{{
"out_dyn_dims"
,
to_value
(
c_dyn_dims
)}}),
inputs
[
0
],
inputs
[
1
]);
}
if
(
inputs
[
1
]
->
get_shape
().
dyn_dims
()
!=
c_dyn_dims
)
{
inputs
[
1
]
=
m
.
insert_instruction
(
ins
,
make_op
(
"multibroadcast"
,
{{
"out_dyn_dims"
,
to_value
(
c_dyn_dims
)}}),
inputs
[
1
],
inputs
[
0
]);
}
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
auto
input
)
{
if
(
input
->
get_shape
().
type
()
!=
c_type
)
{
input
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
c_type
}}),
input
);
}
return
input
;
});
}
else
{
auto
common
=
common_shape
(
to_shapes
(
inputs
));
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
inputs
.
begin
(),
[
&
](
auto
input
)
{
if
(
input
->
get_shape
().
lens
()
!=
common
.
lens
())
...
...
@@ -128,6 +210,7 @@ instruction_ref insert_common_op(module& m,
}
return
input
;
});
}
return
m
.
insert_instruction
(
ins
,
op
,
inputs
);
}
...
...
src/include/migraphx/common.hpp
View file @
30a0b238
...
...
@@ -36,6 +36,9 @@ struct operation;
std
::
vector
<
std
::
size_t
>
compute_broadcasted_lens
(
std
::
vector
<
std
::
size_t
>
s0
,
std
::
vector
<
std
::
size_t
>
s1
);
std
::
vector
<
shape
::
dynamic_dimension
>
compute_broadcasted_dyn_dims
(
shape
s0
,
shape
s1
);
shape
common_shape
(
const
std
::
vector
<
shape
>&
shapes
);
instruction_ref
insert_common_op
(
module
&
m
,
...
...
src/include/migraphx/op/binary.hpp
View file @
30a0b238
...
...
@@ -28,6 +28,7 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/value.hpp>
#include <migraphx/dyn_output.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -60,10 +61,19 @@ struct binary : op_name<Derived>
value
attributes
()
const
{
return
base_attributes
();
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
2
).
same_type
().
same_dims
();
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
),
true
}
.
has
(
2
)
.
same_type
()
.
same_dims
();
auto
s0
=
inputs
.
at
(
0
);
auto
s1
=
inputs
.
at
(
1
);
if
(
s0
==
s1
and
s0
.
packed
())
if
(
s0
.
dynamic
()
or
s1
.
dynamic
())
{
if
(
s0
==
s1
)
return
s0
;
MIGRAPHX_THROW
(
"BINARY: "
+
point_function
()
+
": fixed-dyn shape for inputs"
);
}
else
if
(
s0
==
s1
and
s0
.
packed
())
{
return
s0
;
}
...
...
@@ -81,9 +91,9 @@ struct binary : op_name<Derived>
}
}
argument
compute
(
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
out
put_shape
};
argument
result
{
dyn_out
.
com
put
ed
_shape
};
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
std
::
transform
(
input1
.
begin
(),
input1
.
end
(),
...
...
src/include/migraphx/op/broadcast.hpp
View file @
30a0b238
...
...
@@ -27,23 +27,30 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/dyn_output.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
/// The broadcast operator performs the numpy-style broadcasting of an axis of a given tensor. This
/// is achieved primarily by setting the stride of the broadcasted axis to zero. Linear indicies are
/// computed from multi-indicies by computing the inner product on the multi-index with the strides.
/// For example, if we have a tensor A(2,3) it has lengths of (2,3) and strides of (3,1). If we want
/// to compute the linear offset that corresponds to the element on the 2nd row (i = 1) and 3rd
/// column (j = 2), we compute the following inner product (1,2) dot (3, 1) = 1*3 + 2*1 = 5. It is
/// obvious from there that we can negate the effects of a given axis by setting the stride of that
/// axis to zero.
/**
* 1 input version:
* Broadcasts a tensor from the original shape to the broadcast_lens by setting the stride of
* broadcasted dimensions to zero. `axis` attribute for a 1D input shape is the output dimension
* that stays the same. ex: broadcasting shape [1024] -> [4, 1024, 3] has axis = 1 For higher rank
* input shapes, axis is an offset parameter for the broadcasting. Such that this operator would
* work in the opposite direction of NumPy broadcasting. ex: broadcasting shape [2, 2] -> [2, 2, 3]
* with axis = 0
*
* 2 input version:
* Broadcast the first input 1D shape into the second input shape based on the axis parameter.
* Handles broadcasting a 1D static shape into a higher rank dynamic shape.
* broadcast_lens is not used
*/
struct
broadcast
{
uint64_t
axis
=
0
;
std
::
vector
<
std
::
size_t
>
broadcast_lens
;
std
::
vector
<
std
::
size_t
>
broadcast_lens
=
{}
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
...
...
@@ -54,36 +61,86 @@ struct broadcast
std
::
string
name
()
const
{
return
"broadcast"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
auto
input
=
inputs
.
at
(
0
);
auto
t
=
input
.
type
();
std
::
vector
<
size_t
>
bcast_strides
(
broadcast_lens
.
size
(),
0
);
// the broacast op is deprecated now, so not handling the negative
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
);
auto
s0
=
inputs
.
at
(
0
);
auto
t
=
s0
.
type
();
if
(
inputs
.
size
()
==
1
)
{
// the ONNX broadcast op is deprecated now, so not handling the negative
// value of axis anymore
if
(
axis
>=
broadcast_lens
.
size
())
{
MIGRAPHX_THROW
(
"BROADCAST : axis is out of range"
);
MIGRAPHX_THROW
(
"BROADCAST : axis "
+
migraphx
::
to_string
(
axis
)
+
" is out of range"
);
}
if
(
broadcast_lens
.
size
()
-
axis
<
input
.
lens
().
size
())
if
(
broadcast_lens
.
size
()
-
axis
<
s0
.
lens
().
size
())
{
MIGRAPHX_THROW
(
"BROADCAST: (broadcast ndims - axis) is less than
input
ndims"
);
MIGRAPHX_THROW
(
"BROADCAST: (broadcast ndims - axis) is less than
s0
ndims"
);
}
if
(
not
std
::
equal
(
input
.
lens
().
begin
(),
input
.
lens
().
end
(),
broadcast_lens
.
begin
()
+
axis
))
if
(
not
std
::
equal
(
s0
.
lens
().
begin
(),
s0
.
lens
().
end
(),
broadcast_lens
.
begin
()
+
axis
))
{
MIGRAPHX_THROW
(
"BROADCAST: when broadcasting, succeeding sizes must match"
);
}
std
::
copy
(
input
.
strides
().
begin
(),
input
.
strides
().
end
(),
bcast_strides
.
begin
()
+
axis
);
std
::
vector
<
size_t
>
bcast_strides
(
broadcast_lens
.
size
(),
0
);
std
::
copy
(
s0
.
strides
().
begin
(),
s0
.
strides
().
end
(),
bcast_strides
.
begin
()
+
axis
);
shape
output
{
t
,
broadcast_lens
,
std
::
move
(
bcast_strides
)};
if
(
output
.
elements
()
<
input
.
elements
())
MIGRAPHX_THROW
(
"BROADCAST: output size must be greater than or equal to input size"
);
if
(
output
.
elements
()
<
s0
.
elements
())
{
// don't think this can occur?
MIGRAPHX_THROW
(
"BROADCAST: output size must be greater than or equal to s0 size"
);
}
return
output
;
}
else
{
// two inputs
auto
s1
=
inputs
.
at
(
1
);
if
(
s0
.
dynamic
())
{
MIGRAPHX_THROW
(
"BROADCAST_2in: s0 is a dynamic shape, does not handle broadcasting "
"a dynamic shape"
);
}
if
(
s0
.
ndim
()
!=
1
)
{
MIGRAPHX_THROW
(
"BROADCAST_2in: s0 has ndim "
+
migraphx
::
to_string
(
s0
.
ndim
())
+
", only handle ndim = 1"
);
}
if
(
axis
>=
s1
.
ndim
())
{
MIGRAPHX_THROW
(
"BROADCAST_2in: axis "
+
migraphx
::
to_string
(
axis
)
+
" is out of range"
);
}
if
(
s1
.
dynamic
())
{
s0
=
s0
.
to_dynamic
();
if
(
s0
.
dyn_dims
()[
0
]
!=
s1
.
dyn_dims
()[
axis
])
{
MIGRAPHX_THROW
(
"BROADCAST_2in: s0 length doesn't match with dynamic s1 axis "
"dimension length ("
+
migraphx
::
to_string
(
s0
.
dyn_dims
()[
0
])
+
" != "
+
migraphx
::
to_string
(
s1
.
dyn_dims
()[
axis
])
+
")"
);
}
return
s1
;
}
if
(
s0
.
lens
()[
0
]
!=
s1
.
lens
()[
axis
])
{
MIGRAPHX_THROW
(
"BROADCAST_2in: s0 length doesn't match with static s1 axis "
"dimension length ("
+
migraphx
::
to_string
(
s0
.
lens
()[
0
])
+
" != "
+
migraphx
::
to_string
(
s1
.
lens
()[
axis
])
+
")"
);
}
std
::
vector
<
size_t
>
bcast_strides
(
s1
.
ndim
(),
0
);
std
::
copy
(
s0
.
strides
().
begin
(),
s0
.
strides
().
end
(),
bcast_strides
.
begin
()
+
axis
);
shape
output
{
t
,
s1
.
lens
(),
std
::
move
(
bcast_strides
)};
return
output
;
}
argument
compute
(
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
}
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
return
args
[
0
].
reshape
(
out
put_shape
);
return
args
[
0
].
reshape
(
dyn_out
.
com
put
ed
_shape
);
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
0
;
}
};
...
...
src/include/migraphx/op/multibroadcast.hpp
View file @
30a0b238
...
...
@@ -26,64 +26,105 @@
#include <migraphx/check_shapes.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/common.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
/**
* Broadcast multiple dimensions between two tensors.
* Two versions of this operator: one input and two inputs.
* One input version uses output_lens attribute and broadcasts to it.
* Two inputs version broadcasts both inputs to the common shape at evaluation time.
*/
struct
multibroadcast
{
std
::
vector
<
std
::
size_t
>
output_lens
;
std
::
vector
<
std
::
size_t
>
output_lens
=
{};
// optional attribute
std
::
vector
<
shape
::
dynamic_dimension
>
output_dyn_dims
=
{};
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
output_lens
,
"out_lens"
));
return
pack
(
f
(
self
.
output_lens
,
"out_lens"
)
,
f
(
self
.
output_dyn_dims
,
"out_dyn_dims"
)
);
}
std
::
string
name
()
const
{
return
"multibroadcast"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
check_shapes
{
inputs
,
*
this
,
true
}.
has
(
1
,
2
);
auto
t
=
inputs
.
at
(
0
).
type
();
auto
input
=
inputs
.
at
(
0
);
auto
s0
=
inputs
.
at
(
0
);
if
(
input
.
lens
().
empty
())
if
(
s0
.
max_
lens
().
empty
())
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: input
s
dimensions should be > 0"
);
MIGRAPHX_THROW
(
"MULTIBROADCAST: input dimensions should be > 0"
);
}
if
(
input
.
lens
().
size
()
>
output_lens
.
size
())
auto
make_bcast_strides
=
[
&
](
std
::
vector
<
std
::
size_t
>
bcast_lens
,
std
::
size_t
offset
)
{
std
::
vector
<
size_t
>
bcast_strides
(
bcast_lens
.
size
(),
0
);
for
(
std
::
ptrdiff_t
i
=
s0
.
lens
().
size
()
-
1
;
i
>=
0
;
i
--
)
{
if
(
bcast_lens
[
i
+
offset
]
==
s0
.
lens
()[
i
])
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: inputs dimensions should <= output size"
)
;
bcast_strides
[
i
+
offset
]
=
s0
.
strides
()[
i
]
;
}
}
return
bcast_strides
;
};
auto
offset
=
output_lens
.
size
()
-
input
.
lens
().
size
();
for
(
std
::
ptrdiff_t
i
=
input
.
lens
().
size
()
-
1
;
i
>=
0
;
i
--
)
if
(
inputs
.
size
()
==
1
)
{
if
(
s0
.
lens
().
size
()
>
output_lens
.
size
())
{
if
(
output_lens
[
i
+
offset
]
!=
input
.
lens
()[
i
]
and
input
.
lens
()[
i
]
!=
1
)
MIGRAPHX_THROW
(
"MULTIBROADCAST: input dimensions should <= output size"
);
}
auto
offset
=
output_lens
.
size
()
-
s0
.
lens
().
size
();
for
(
std
::
ptrdiff_t
i
=
s0
.
lens
().
size
()
-
1
;
i
>=
0
;
i
--
)
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: input shape {"
+
to_string_range
(
input
.
lens
())
+
if
(
output_lens
[
i
+
offset
]
!=
s0
.
lens
()[
i
]
and
s0
.
lens
()[
i
]
!=
1
)
{
MIGRAPHX_THROW
(
"MULTIBROADCAST: input shape {"
+
to_string_range
(
s0
.
lens
())
+
"} cannot be broadcasted to {"
+
to_string_range
(
output_lens
)
+
"}!"
);
}
}
std
::
vector
<
size_t
>
bcast_strides
(
output_lens
.
size
(),
0
);
for
(
std
::
ptrdiff_t
i
=
input
.
lens
().
size
()
-
1
;
i
>=
0
;
i
--
)
auto
bcast_strides
=
make_bcast_strides
(
output_lens
,
offset
);
return
{
t
,
output_lens
,
std
::
move
(
bcast_strides
)};
}
else
{
if
(
output_lens
[
i
+
offset
]
==
input
.
lens
()[
i
])
// two inputs
auto
s1
=
inputs
.
at
(
1
);
if
(
s0
.
dynamic
()
or
s1
.
dynamic
())
{
bcast_strides
[
i
+
offset
]
=
input
.
strides
()[
i
];
if
(
not
output_dyn_dims
.
empty
())
{
return
{
t
,
output_dyn_dims
};
}
return
{
t
,
compute_broadcasted_dyn_dims
(
s0
,
s1
)};
}
return
{
t
,
output_lens
,
bcast_strides
};
else
{
auto
bcast_lens
=
compute_broadcasted_lens
(
s0
.
lens
(),
s1
.
lens
());
auto
offset
=
bcast_lens
.
size
()
-
s0
.
lens
().
size
();
auto
bcast_strides
=
make_bcast_strides
(
bcast_lens
,
offset
);
return
{
t
,
std
::
move
(
bcast_lens
),
std
::
move
(
bcast_strides
)};
}
argument
compute
(
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
}
}
argument
compute
(
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
return
args
[
0
].
reshape
(
out
put_shape
);
return
args
[
0
].
reshape
(
dyn_out
.
com
put
ed
_shape
);
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
0
;
}
};
...
...
src/include/migraphx/shape.hpp
View file @
30a0b238
...
...
@@ -30,6 +30,7 @@
#include <numeric>
#include <memory>
#include <migraphx/functional.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/half.hpp>
#include <migraphx/config.hpp>
...
...
@@ -89,7 +90,10 @@ struct shape
std
::
size_t
opt
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
);
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
min
,
"min"
),
f
(
self
.
max
,
"max"
),
f
(
self
.
opt
,
"opt"
));
}
bool
is_fixed
()
const
;
bool
has_optimal
()
const
;
...
...
@@ -115,6 +119,12 @@ struct shape
shape
(
type_t
t
,
std
::
vector
<
dynamic_dimension
>
dims
);
// Construct a dynamic shape from three sets of lengths (of the same rank)
shape
(
type_t
t
,
std
::
vector
<
std
::
size_t
>
mins
,
std
::
vector
<
std
::
size_t
>
maxes
,
std
::
vector
<
std
::
size_t
>
opts
);
template
<
class
Range
>
shape
(
type_t
t
,
const
Range
&
l
)
:
shape
(
t
,
std
::
vector
<
std
::
size_t
>
(
l
.
begin
(),
l
.
end
()))
{
...
...
@@ -136,6 +146,12 @@ struct shape
const
std
::
vector
<
std
::
size_t
>&
lens
()
const
;
const
std
::
vector
<
std
::
size_t
>&
strides
()
const
;
/*!
* The number of dimensions in the shape.
* Same as the number of indices required to get a data value.
*/
std
::
size_t
ndim
()
const
;
/*!
* Return the number of elements in the tensor.
*/
...
...
@@ -221,6 +237,9 @@ struct shape
shape
with_type
(
type_t
t
)
const
;
// convert the shape to an equivalent dynamic shape
shape
to_dynamic
()
const
;
friend
bool
operator
==
(
const
shape
&
x
,
const
shape
&
y
);
friend
bool
operator
!=
(
const
shape
&
x
,
const
shape
&
y
);
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
shape
&
x
);
...
...
src/onnx/parse_batchnorm.cpp
View file @
30a0b238
...
...
@@ -44,7 +44,7 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
{
epsilon
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"epsilon"
)).
at
<
float
>
();
}
auto
x_lens
=
args
[
0
]
->
get_shape
().
lens
();
auto
x_lens
=
args
[
0
]
->
get_shape
().
max_
lens
();
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
if
(
std
::
any_of
(
args
.
cbegin
()
+
1
,
args
.
cend
(),
[](
auto
a
)
{
...
...
src/onnx/parse_binary_op.cpp
View file @
30a0b238
...
...
@@ -57,6 +57,12 @@ struct parse_binary_op : op_parser<parse_binary_op>
parser
.
parse_value
(
info
.
attributes
.
at
(
"broadcast"
)).
at
<
uint64_t
>
();
if
(
broadcasted
!=
0
)
{
if
(
std
::
any_of
(
args
.
cbegin
(),
args
.
cend
(),
[](
auto
a
)
{
return
a
->
get_shape
().
dynamic
();
}))
{
MIGRAPHX_THROW
(
"Binary op broadcast attribute not supported for dynamic input shapes"
);
}
uint64_t
axis
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"axis"
)).
at
<
uint64_t
>
();
auto
l
=
info
.
add_instruction
(
make_op
(
"broadcast"
,
...
...
src/onnx/parse_split.cpp
View file @
30a0b238
...
...
@@ -26,6 +26,9 @@
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/stringutils.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -55,12 +58,12 @@ struct parse_split : op_parser<parse_split>
{
literal
s
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"split"
));
s
.
visit
([
&
](
auto
v
)
{
vec_splits
.
assign
(
v
.
begin
(),
v
.
end
());
});
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
static_cast
<
int64_t
>
(
lens
[
tuned_axis
]))
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: sum of split attribute unequal to dim size of axis!"
);
}
else
if
(
args
.
size
()
==
2
)
{
auto
s
=
args
[
1
]
->
eval
();
check_arg_empty
(
s
,
"Split: dynamic shape is not supported"
);
s
.
visit
([
&
](
auto
v
)
{
vec_splits
.
assign
(
v
.
begin
(),
v
.
end
());
});
}
// no split attribute, input is equally divided
else
...
...
@@ -74,6 +77,15 @@ struct parse_split : op_parser<parse_split>
vec_splits
.
resize
(
info
.
num_outputs
,
dl
);
}
if
(
std
::
accumulate
(
vec_splits
.
begin
(),
vec_splits
.
end
(),
int64_t
(
0
))
!=
static_cast
<
int64_t
>
(
lens
[
tuned_axis
]))
{
MIGRAPHX_THROW
(
"PARSE_SPLIT: sum of split attribute unequal to dim size of axis! tuned axis:"
+
std
::
to_string
(
lens
[
tuned_axis
])
+
" Output "
+
to_string_range
(
vec_splits
)
+
" Rank "
+
std
::
to_string
(
n_rank
)
+
" Len outs "
+
to_string_range
(
lens
));
}
std
::
vector
<
instruction_ref
>
ret_ins
;
int64_t
start
=
0
;
for
(
auto
sl
:
vec_splits
)
...
...
src/pass_manager.cpp
View file @
30a0b238
...
...
@@ -94,11 +94,19 @@ struct module_pm : module_pass_manager
virtual
void
run_pass
(
const
pass
&
p
)
override
{
assert
(
mod
);
timer
ts
{};
using
seconds
=
std
::
chrono
::
duration
<
double
>
;
trace
(
"Module: "
,
mod
->
name
(),
", Pass: "
,
p
.
name
());
const
double
t1
=
ts
.
record
<
seconds
>
();
assert
(
mod
->
validate
()
==
mod
->
end
());
p
.
apply
(
*
this
);
trace
(
*
mod
);
validate_pass
(
*
mod
,
p
,
*
t
);
const
double
t2
=
ts
.
record
<
seconds
>
();
trace
(
"Pass: "
,
p
.
name
(),
" completed in (s): "
,
(
t2
-
t1
));
}
};
...
...
src/shape.cpp
View file @
30a0b238
...
...
@@ -71,6 +71,19 @@ struct shape_impl
{
}
shape_impl
(
shape
::
type_t
t
,
std
::
vector
<
std
::
size_t
>
mins
,
std
::
vector
<
std
::
size_t
>
maxes
,
std
::
vector
<
std
::
size_t
>
opts
)
:
m_type
(
t
)
{
assert
(
mins
.
size
()
==
maxes
.
size
()
and
maxes
.
size
()
==
opts
.
size
());
for
(
size_t
i
=
0
;
i
<
mins
.
size
();
++
i
)
{
m_dyn_dims
.
push_back
(
shape
::
dynamic_dimension
{
mins
[
i
],
maxes
[
i
],
opts
[
i
]});
}
}
shape_impl
(
const
std
::
vector
<
shape
>&
subs
)
:
m_type
(
shape
::
tuple_type
),
m_shapes
(
subs
)
{}
shape
::
type_t
m_type
;
...
...
@@ -224,6 +237,14 @@ shape::shape(type_t t, std::vector<shape::dynamic_dimension> dims)
{
}
shape
::
shape
(
type_t
t
,
std
::
vector
<
std
::
size_t
>
mins
,
std
::
vector
<
std
::
size_t
>
maxes
,
std
::
vector
<
std
::
size_t
>
opts
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
t
,
std
::
move
(
mins
),
std
::
move
(
maxes
),
std
::
move
(
opts
)))
{
}
shape
::
shape
(
const
std
::
vector
<
shape
>&
subs
)
:
impl
(
std
::
make_shared
<
shape_impl
>
(
subs
))
{}
shape
::
shape
(
std
::
shared_ptr
<
shape_impl
>
pimpl
)
:
impl
(
std
::
move
(
pimpl
))
{}
...
...
@@ -244,6 +265,15 @@ const std::vector<std::size_t>& shape::lens() const { return impl->m_lens; }
const
std
::
vector
<
std
::
size_t
>&
shape
::
strides
()
const
{
return
impl
->
m_strides
;
}
std
::
size_t
shape
::
ndim
()
const
{
if
(
this
->
dynamic
())
{
return
dyn_dims
().
size
();
}
return
lens
().
size
();
}
std
::
size_t
shape
::
elements
()
const
{
return
impl
->
elements
();
}
std
::
size_t
shape
::
bytes
()
const
...
...
@@ -437,6 +467,16 @@ shape shape::with_type(type_t t) const
return
{
c
};
}
shape
shape
::
to_dynamic
()
const
{
if
(
this
->
dynamic
())
{
return
*
this
;
}
std
::
vector
<
std
::
size_t
>
zeroes
(
this
->
ndim
(),
0
);
return
{
type
(),
lens
(),
lens
(),
zeroes
};
}
std
::
size_t
shape
::
element_space
()
const
{
return
impl
->
element_space
();
}
std
::
string
shape
::
type_string
()
const
{
return
name
(
this
->
type
());
}
...
...
@@ -464,15 +504,11 @@ bool shape::dynamic_dimension::is_fixed() const { return this->min == this->max;
bool
shape
::
dynamic_dimension
::
has_optimal
()
const
{
return
opt
!=
0
;
}
template
<
class
Self
,
class
F
>
auto
shape
::
dynamic_dimension
::
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
min
,
"min"
),
f
(
self
.
max
,
"max"
),
f
(
self
.
opt
,
"opt"
));
}
bool
operator
==
(
const
shape
::
dynamic_dimension
&
x
,
const
shape
::
dynamic_dimension
&
y
)
{
return
(
x
.
min
==
y
.
min
and
x
.
max
==
y
.
max
and
x
.
opt
==
y
.
opt
);
// don't check opt if both are fixed
return
(
x
.
min
==
y
.
min
and
x
.
max
==
y
.
max
and
((
x
.
is_fixed
()
and
y
.
is_fixed
())
or
(
x
.
opt
==
y
.
opt
)));
}
bool
operator
!=
(
const
shape
::
dynamic_dimension
&
x
,
const
shape
::
dynamic_dimension
&
y
)
...
...
src/targets/gpu/compile_miopen.cpp
View file @
30a0b238
...
...
@@ -64,7 +64,7 @@ std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool for
{
op
.
from_value
({{
"int8_x4_format"
,
format
}});
auto
v
=
op
.
compile
(
*
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
return
v
.
get
(
"workspace"
,
0
);
return
v
.
get
<
std
::
size_t
>
(
"workspace"
,
0
);
}
void
compile_miopen
::
apply
(
module
&
m
)
const
...
...
src/targets/gpu/compile_ops.cpp
View file @
30a0b238
...
...
@@ -40,18 +40,25 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
struct
precompile_op
{
operation
op
=
op
::
identity
{};
std
::
size_t
additional_args
=
1
;
bool
ignore_modules
=
false
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
op
,
"op"
));
return
pack
(
f
(
self
.
op
,
"op"
),
f
(
self
.
additional_args
,
"additional_args"
),
f
(
self
.
ignore_modules
,
"ignore_modules"
));
}
std
::
string
name
()
const
{
return
"gpu::precompile_op"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
const
std
::
vector
<
module_ref
>&
mods
)
const
{
inputs
.
pop_back
();
// Pop off additional args
inputs
.
resize
(
inputs
.
size
()
-
additional_args
);
if
(
ignore_modules
)
return
op
.
compute_shape
(
inputs
);
return
op
.
compute_shape
(
inputs
,
mods
);
}
...
...
src/targets/gpu/fuse_ops.cpp
View file @
30a0b238
...
...
@@ -772,11 +772,9 @@ struct find_layernorm_pointwise
{
auto
ins
=
r
.
result
;
auto
layernorm
=
r
.
instructions
[
"layernorm"
];
auto
*
pm
=
ins
->
module_inputs
().
front
();
if
(
not
layernorm
->
module_inputs
().
empty
())
return
;
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
inputs
=
layernorm
->
inputs
();
inputs
.
pop_back
();
inputs
.
insert
(
inputs
.
end
(),
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
());
...
...
@@ -785,6 +783,37 @@ struct find_layernorm_pointwise
}
};
struct
find_concat_pointwise
{
auto
matcher
()
const
{
return
precompile_name
(
"pointwise"
)(
match
::
arg
(
0
)(
precompile_name
(
"concat"
).
bind
(
"concat"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
concat
=
r
.
instructions
[
"concat"
];
if
(
not
concat
->
module_inputs
().
empty
())
return
;
// TODO: Handle type conversions
if
(
ins
->
get_shape
().
type
()
!=
concat
->
get_shape
().
type
())
return
;
auto
*
pm
=
ins
->
module_inputs
().
front
();
auto
inputs
=
concat
->
inputs
();
inputs
.
pop_back
();
inputs
.
insert
(
inputs
.
end
(),
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
());
auto
op
=
concat
->
get_operator
();
op
.
from_value
({{
"additional_args"
,
ins
->
inputs
().
size
()
-
1
},
{
"ignore_modules"
,
true
}});
m
.
replace_instruction
(
ins
,
op
,
inputs
,
{
pm
});
}
};
void
fuse_ops
::
apply
(
module
&
m
)
const
{
match
::
find_matches
(
m
,
find_contiguous_pointwise
{});
...
...
@@ -793,6 +822,7 @@ void fuse_ops::apply(module& m) const
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
find_layernorm_pointwise
{},
find_concat_pointwise
{},
find_gemm_pointwise
{},
find_contiguous_tranpose_gemm
{},
find_commutative_broadcast
{});
...
...
src/targets/gpu/gemm_impl.cpp
View file @
30a0b238
...
...
@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <rocblas.h>
#include <rocblas
/rocblas
.h>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/permutation.hpp>
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
30a0b238
...
...
@@ -25,7 +25,7 @@
#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP
#include <migraphx/manage_ptr.hpp>
#include <migraphx/config.hpp>
#include <rocblas.h>
#include <rocblas
/rocblas
.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/targets/gpu/jit/concat.cpp
View file @
30a0b238
...
...
@@ -38,16 +38,19 @@ using namespace migraphx::gpu::gen; // NOLINT
static
const
char
*
const
concat_kernel
=
R"__migraphx__(
#include <migraphx/kernels/concat.hpp>
#include <migraphx/kernels/vectorize.hpp>
#include <migraphx/kernels/ops.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void ${kernel}(${params})
{
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) {
concat<${axis}>(y, xs...);
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y,
${concat_params},
auto... xs) {
concat<${axis}>(
${concat_args})(${post},
y, xs...);
});
}
...
...
@@ -68,28 +71,42 @@ struct concat_compiler : compiler<concat_compiler>
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
// TODO: Use reduce_dims
auto
num_of_concat_inputs
=
v
.
get
(
"concat_inputs"
,
inputs
.
size
()
-
1
);
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
params
=
"-Wno-float-equal"
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
auto
src
=
interpolate_string
(
concat_kernel
,
auto
src
=
interpolate_string
(
concat_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"concat_params"
,
enum_params
(
num_of_concat_inputs
,
"auto concat_x"
)},
{
"concat_args"
,
enum_params
(
num_of_concat_inputs
,
"concat_x"
)},
{
"post"
,
v
.
get
(
"post"
,
std
::
string
{
"op::id{}"
})},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})},
{
"axis"
,
v
.
at
(
"axis"
).
to
<
std
::
string
>
()}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
auto
v
=
op
.
to_value
();
if
(
not
ins
->
module_inputs
().
empty
())
{
auto
*
pm
=
ins
->
module_inputs
().
front
();
v
[
"concat_inputs"
]
=
ins
->
inputs
().
size
()
-
pm
->
get_parameter_names
().
size
();
v
[
"preamble"
]
=
generate_pointwise
(
*
pm
,
"post_concat"
);
v
[
"post"
]
=
"MIGRAPHX_LIFT(post_concat)"
;
v
[
"kernel"
]
=
"concat_"
+
generate_name_from_ops
(
*
pm
)
+
"_kernel"
;
}
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
};
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment