Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
8d7a8a6c
Commit
8d7a8a6c
authored
Dec 06, 2023
by
Artur Wojcik
Browse files
Merge branch 'develop' into uif2-initial
parents
25b33431
a09dc502
Changes
203
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1900 additions
and
462 deletions
+1900
-462
src/onnx/parse_pooling.cpp
src/onnx/parse_pooling.cpp
+11
-219
src/onnx/parse_qlinearpooling.cpp
src/onnx/parse_qlinearpooling.cpp
+115
-0
src/onnx/parse_scatternd.cpp
src/onnx/parse_scatternd.cpp
+7
-5
src/onnx/parse_unique.cpp
src/onnx/parse_unique.cpp
+92
-0
src/onnx/pooling.cpp
src/onnx/pooling.cpp
+247
-0
src/targets/cpu/dnnl.cpp
src/targets/cpu/dnnl.cpp
+1
-0
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+12
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+10
-4
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+10
-0
src/targets/gpu/fuse_mlir.cpp
src/targets/gpu/fuse_mlir.cpp
+280
-171
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+68
-17
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
+2
-1
src/targets/gpu/include/migraphx/gpu/gemm_softmax_gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm_softmax_gemm.hpp
+4
-0
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+2
-0
src/targets/gpu/jit/scatter.hpp
src/targets/gpu/jit/scatter.hpp
+78
-0
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+8
-37
src/targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp
...targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp
+37
-0
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
+21
-7
src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp
src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp
+564
-0
src/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp
...gets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp
+331
-0
No files found.
src/onnx/parse_pooling.cpp
View file @
8d7a8a6c
...
@@ -22,14 +22,8 @@
...
@@ -22,14 +22,8 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/pooling.hpp>
#include <migraphx/onnx/padding.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/make_op.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -39,76 +33,14 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -39,76 +33,14 @@ struct parse_pooling : op_parser<parse_pooling>
{
{
std
::
vector
<
op_desc
>
operators
()
const
std
::
vector
<
op_desc
>
operators
()
const
{
{
return
{{
"AveragePool"
,
"average"
},
return
{
{
"GlobalAveragePool"
,
"average"
},
{
"AveragePool"
,
"average"
},
{
"GlobalMaxPool"
,
"max"
},
{
"GlobalAveragePool"
,
"average"
},
{
"MaxPool"
,
"max"
},
{
"GlobalMaxPool"
,
"max"
},
{
"LpPool"
,
"lpnorm"
},
{
"MaxPool"
,
"max"
},
{
"GlobalLpPool"
,
"lpnorm"
}};
{
"LpPool"
,
"lpnorm"
},
}
{
"GlobalLpPool"
,
"lpnorm"
},
};
value
handle_values
(
const
op_desc
&
opd
,
onnx_parser
::
node_info
info
,
const
shape
&
in_shape
,
value
values
)
const
{
auto
kdims
=
in_shape
.
ndim
()
-
2
;
if
(
starts_with
(
opd
.
onnx_name
,
"Global"
))
{
// if spatial dimensions are dynamic use dyn_global flag
if
(
in_shape
.
dynamic
()
and
std
::
any_of
(
in_shape
.
dyn_dims
().
cbegin
()
+
2
,
in_shape
.
dyn_dims
().
cend
(),
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
values
[
"dyn_global"
]
=
true
;
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
();
}
else
{
// works with static and fixed dynamic shape
auto
m_lens
=
in_shape
.
max_lens
();
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
(
m_lens
.
begin
()
+
2
,
m_lens
.
end
());
}
}
if
(
contains
(
info
.
attributes
,
"ceil_mode"
))
{
values
[
"ceil_mode"
]
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"ceil_mode"
).
i
());
}
if
(
contains
(
info
.
attributes
,
"strides"
))
{
values
[
"stride"
].
clear
();
copy
(
info
.
attributes
[
"strides"
].
ints
(),
std
::
back_inserter
(
values
[
"stride"
]));
check_attr_sizes
(
kdims
,
values
[
"stride"
].
size
(),
"PARSE_POOLING: inconsistent strides"
);
}
if
(
contains
(
info
.
attributes
,
"kernel_shape"
))
{
values
[
"lengths"
].
clear
();
copy
(
info
.
attributes
[
"kernel_shape"
].
ints
(),
std
::
back_inserter
(
values
[
"lengths"
]));
check_attr_sizes
(
kdims
,
values
[
"lengths"
].
size
(),
"PARSE_POOLING: inconsistent lengths"
);
}
if
(
contains
(
info
.
attributes
,
"dilations"
))
{
values
[
"dilations"
].
clear
();
copy
(
info
.
attributes
[
"dilations"
].
ints
(),
std
::
back_inserter
(
values
[
"dilations"
]));
check_attr_sizes
(
kdims
,
values
[
"dilations"
].
size
(),
"PARSE_POOLING: inconsistent dilations"
);
}
// lp_order attribute
if
(
contains
(
info
.
attributes
,
"p"
))
{
values
[
"lp_order"
]
=
info
.
attributes
.
at
(
"p"
).
i
();
}
// ensure pads available only when auto_pad is "NOT_SET"
check_padding_mode
(
info
,
"POOLING"
);
return
values
;
}
}
instruction_ref
parse
(
const
op_desc
&
opd
,
instruction_ref
parse
(
const
op_desc
&
opd
,
...
@@ -116,148 +48,8 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -116,148 +48,8 @@ struct parse_pooling : op_parser<parse_pooling>
onnx_parser
::
node_info
info
,
onnx_parser
::
node_info
info
,
std
::
vector
<
instruction_ref
>
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
{
std
::
string
mode
=
opd
.
op_name
;
return
add_pooling_op
(
opd
,
std
::
move
(
info
),
args
[
0
]);
const
std
::
unordered_map
<
std
::
string
,
op
::
pooling_mode
>
mode_map
=
{
};
{
"max"
,
op
::
pooling_mode
::
max
},
{
"average"
,
op
::
pooling_mode
::
average
},
{
"lpnorm"
,
op
::
pooling_mode
::
lpnorm
}};
if
(
not
contains
(
mode_map
,
mode
))
{
MIGRAPHX_THROW
(
"PARSE_POOLING: onnx pooling mode must be [
\"
max
\"
,
\"
average
\"
,
\"
lpnorm
\"
]"
);
}
operation
op
=
make_op
(
"pooling"
,
{{
"mode"
,
mode_map
.
at
(
mode
)}});
value
values
=
op
.
to_value
();
auto
l0
=
args
[
0
];
auto
in_shape
=
l0
->
get_shape
();
assert
(
in_shape
.
ndim
()
>
2
);
auto
kdims
=
in_shape
.
ndim
()
-
2
;
values
=
handle_values
(
opd
,
info
,
in_shape
,
values
);
// count include padding, if count include pad is 1, we always use
// explicit pad
int
count_include_pad
=
0
;
if
(
contains
(
info
.
attributes
,
"count_include_pad"
))
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: count_include_pad attribute is not supported for "
"dynamic input shape"
);
}
count_include_pad
=
info
.
attributes
.
at
(
"count_include_pad"
).
i
();
}
std
::
vector
<
int64_t
>
paddings
;
float
pad_val
=
((
mode
==
"max"
)
?
std
::
numeric_limits
<
float
>::
lowest
()
:
0.0
f
);
if
(
contains
(
info
.
attributes
,
"pads"
))
{
values
[
"padding"
].
clear
();
copy
(
info
.
attributes
[
"pads"
].
ints
(),
std
::
back_inserter
(
paddings
));
check_attr_sizes
(
kdims
,
paddings
.
size
()
/
2
,
"PARSE_POOLING: inconsistent explicit paddings"
);
}
if
(
paddings
.
size
()
!=
2
*
kdims
)
{
paddings
.
resize
(
kdims
*
2
);
std
::
fill_n
(
paddings
.
begin
(),
2
*
kdims
,
0
);
}
if
(
values
[
"padding"
].
size
()
!=
kdims
)
{
values
[
"padding"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"padding"
].
begin
(),
kdims
,
0
);
}
if
(
values
[
"stride"
].
size
()
!=
kdims
)
{
values
[
"stride"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"stride"
].
begin
(),
kdims
,
1
);
}
if
(
values
[
"dilations"
].
size
()
!=
kdims
)
{
values
[
"dilations"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"dilations"
].
begin
(),
kdims
,
1
);
}
// used to calculate the supposed output shape
std
::
vector
<
int64_t
>
orig_padding
=
paddings
;
if
(
contains
(
info
.
attributes
,
"auto_pad"
)
and
to_upper
(
info
.
attributes
[
"auto_pad"
].
s
())
!=
"NOTSET"
)
{
auto
auto_pad
=
to_upper
(
info
.
attributes
[
"auto_pad"
].
s
());
// don't use the given padding sizes, if any
// values["padding"].clear();
if
(
in_shape
.
dynamic
())
{
// set padding_mode to trigger auto padding at runtime
bool
is_same_upper
=
(
auto_pad
.
find
(
"SAME_UPPER"
)
!=
std
::
string
::
npos
);
values
[
"padding_mode"
]
=
is_same_upper
?
to_value
(
op
::
padding_mode_t
::
same_upper
)
:
to_value
(
op
::
padding_mode_t
::
same_lower
);
}
else
{
// Calculate auto padding
cal_auto_padding_size
(
info
,
values
,
values
[
"lengths"
].
to_vector
<
std
::
size_t
>
(),
values
[
"dilations"
].
to_vector
<
std
::
size_t
>
(),
in_shape
.
lens
(),
paddings
);
values
[
"padding"
]
=
paddings
;
// default padding_mode indicates that padding sizes are not calculated dynamically
values
[
"padding_mode"
]
=
migraphx
::
op
::
padding_mode_t
::
default_
;
}
}
std
::
vector
<
int64_t
>
slice_start
;
std
::
vector
<
int64_t
>
slice_end
;
tune_padding_size
(
values
,
paddings
,
count_include_pad
,
slice_start
);
if
(
not
slice_start
.
empty
())
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: asymmetric padding not supported for dynamic input shape"
);
}
// calculate expected output shape
orig_padding
.
insert
(
orig_padding
.
begin
()
+
kdims
,
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
(),
2
,
0
);
op
::
pad
pad
{
orig_padding
,
0.0
f
};
shape
padded_shape
=
pad
.
compute_shape
({
l0
->
get_shape
()});
// make an op just to get its output shape
auto
out_lens
=
make_op
(
"pooling"
,
values
).
compute_shape
({
padded_shape
}).
lens
();
// compute slice_end information
slice_end
.
resize
(
slice_start
.
size
());
std
::
transform
(
out_lens
.
begin
()
+
2
,
out_lens
.
end
(),
slice_start
.
begin
(),
slice_end
.
begin
(),
[](
auto
i
,
auto
j
)
{
return
i
+
j
;
});
}
values
[
"padding"
]
=
std
::
vector
<
size_t
>
(
paddings
.
begin
(),
paddings
.
end
());
check_asym_padding
(
info
,
l0
,
paddings
,
values
,
count_include_pad
,
pad_val
);
op
.
from_value
(
values
);
auto
l1
=
info
.
add_instruction
(
op
,
l0
);
if
(
not
slice_start
.
empty
())
{
std
::
vector
<
int64_t
>
axes
(
kdims
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
l1
=
info
.
add_instruction
(
make_op
(
"slice"
,
{{
"axes"
,
axes
},
{
"starts"
,
slice_start
},
{
"ends"
,
slice_end
}}),
l1
);
}
return
l1
;
}
};
};
}
// namespace onnx
}
// namespace onnx
...
...
src/onnx/parse_qlinear
glavg
pool.cpp
→
src/onnx/parse_qlinearpool
ing
.cpp
View file @
8d7a8a6c
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
*/
*/
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/onnx/pooling.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
...
@@ -36,90 +37,56 @@ namespace onnx {
...
@@ -36,90 +37,56 @@ namespace onnx {
/*
/*
*********************************************************************************
*********************************************************************************
* Reference: see QLinear
GlobalAveragePool in
*
* Reference: see QLinear
AveragePool and QLinearGlobalAveragePool in
*
* github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md *
* github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md *
*********************************************************************************
*********************************************************************************
*/
QLinearGlobalAveragePool consumes an input tensor X and applies
struct
parse_qlinearpooling
:
op_parser
<
parse_qlinearpooling
>
Average pooling across the values in the same channel. This is
equivalent to AveragePool with kernel size equal to the spatial
dimension of input tensor. Input is of type uint8_t or int8_t.
Version
This version of the operator has been available since version 1 of the 'com.microsoft' operator set.
Attributes
channels_last : int
Inputs
X : T
Input data tensor from the previous operator; According to channels_last, dimensions for image case
are (N x C x H x W), or (N x H x W x C) where N is the batch size, C is the number of channels, and
H and W are the height and the width of the data. For non image case, the dimensions are in the form
of (N x C x D1 x D2 ... Dn), or (N x D1 X D2 ... Dn x C) where N is the batch size.
x_scale : tensor(float)
Scale of quantized input 'X'. It must be a scalar.
x_zero_point : T
Zero point tensor for input 'X'. It must be a scalar.
y_scale : tensor(float)
Scale of quantized output 'Y'. It must be a scalar.
y_zero_point : T
Zero point tensor for output 'Y'. It must be a scalar.
Outputs
Y : T
Output data tensor from pooling across the input tensor. The output tensor has the same rank as the
input. with the N and C value keep it value, while the other dimensions are all 1. Type Constraints
T : tensor(uint8), tensor(int8)
Constrain input and output types to signed/unsigned int8 tensors.
*/
struct
parse_qlinearglobalaveragepool
:
op_parser
<
parse_qlinearglobalaveragepool
>
{
{
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"QLinearGlobalAveragePool"
}};
}
std
::
vector
<
op_desc
>
operators
()
const
// basic type checking for QLinearGlobalAveragePool Operator
void
check_inputs
(
const
std
::
vector
<
instruction_ref
>&
args
)
const
{
{
if
(
args
.
size
()
<
5
)
return
{{
"QLinearGlobalAveragePool"
,
"average"
},
{
"QLinearAveragePool"
,
"average"
}};
MIGRAPHX_THROW
(
"QLINEARGLOBALAVERAGEPOOL: missing inputs"
);
}
const
auto
&
in_x
=
args
[
0
];
void
check_inputs
(
const
op_desc
&
opd
,
const
std
::
vector
<
instruction_ref
>&
args
)
const
const
auto
&
zero_pt_x
=
args
[
2
];
{
const
auto
&
zero_pt_y
=
args
[
4
];
const
auto
&
in_x
=
args
[
0
];
const
auto
onnx_name
=
opd
.
onnx_name
;
if
(
in_x
->
get_shape
().
ndim
()
<=
2
)
if
(
in_x
->
get_shape
().
ndim
()
<=
2
)
MIGRAPHX_THROW
(
"QLINEARGLOBALAVERAGEPOOL
: input dimensions too small"
);
MIGRAPHX_THROW
(
onnx_name
+
"
: input dimensions too small"
);
auto
type_x
=
in_x
->
get_shape
().
type
();
auto
type_x
=
in_x
->
get_shape
().
type
();
if
(
type_x
!=
migraphx
::
shape
::
int8_type
and
type_x
!=
migraphx
::
shape
::
uint8_type
)
if
(
type_x
!=
migraphx
::
shape
::
int8_type
and
type_x
!=
migraphx
::
shape
::
uint8_type
)
MIGRAPHX_THROW
(
"QLINEARGLOBALAVERAGEPOOL
: unsupported input type"
);
MIGRAPHX_THROW
(
onnx_name
+
"
: unsupported input type"
);
const
auto
&
zero_pt_x
=
args
[
2
];
if
(
type_x
!=
zero_pt_x
->
get_shape
().
type
())
if
(
type_x
!=
zero_pt_x
->
get_shape
().
type
())
MIGRAPHX_THROW
(
"QLINEARGLOBALAVERAGEPOOL: mismatched type: input zero point"
);
MIGRAPHX_THROW
(
onnx_name
+
": mismatched type: input zero point"
);
if
(
type_x
!=
zero_pt_y
->
get_shape
().
type
())
if
(
args
.
size
()
==
5
)
MIGRAPHX_THROW
(
"QLINEARGLOBALAVERAGEPOOL: mismatched type: output zero point"
);
{
const
auto
&
zero_pt_y
=
args
[
4
];
if
(
type_x
!=
zero_pt_y
->
get_shape
().
type
())
MIGRAPHX_THROW
(
onnx_name
+
": mismatched type: output zero point"
);
}
}
}
instruction_ref
parse
(
const
op_desc
&
/* opd */
,
instruction_ref
parse
(
const
op_desc
&
opd
,
const
onnx_parser
&
parser
,
const
onnx_parser
&
parser
,
const
onnx_parser
::
node_info
&
info
,
const
onnx_parser
::
node_info
&
info
,
const
std
::
vector
<
instruction_ref
>&
args
)
const
const
std
::
vector
<
instruction_ref
>&
args
)
const
{
{
int
channels_last
=
if
(
contains
(
info
.
attributes
,
"channel_last"
))
parser
.
parse_value
(
info
.
attributes
.
at
(
"channels_last"
)).
template
at
<
int
>();
{
if
(
channels_last
!=
0
)
int
channels_last
=
MIGRAPHX_THROW
(
parser
.
parse_value
(
info
.
attributes
.
at
(
"channels_last"
)).
template
at
<
int
>();
"QLINEARGLOBALAVERAGEPOOL: channels_last (N x D1..Dn x C) is not supported"
);
if
(
channels_last
!=
0
)
MIGRAPHX_THROW
(
opd
.
onnx_name
+
": channels_last (N x D1..Dn x C) is not supported"
);
}
check_inputs
(
args
);
check_inputs
(
opd
,
args
);
// Input: X
// Input: X
...
@@ -128,21 +95,18 @@ struct parse_qlinearglobalaveragepool : op_parser<parse_qlinearglobalaveragepool
...
@@ -128,21 +95,18 @@ struct parse_qlinearglobalaveragepool : op_parser<parse_qlinearglobalaveragepool
const
auto
&
zero_pt_x
=
args
[
2
];
const
auto
&
zero_pt_x
=
args
[
2
];
auto
dquant_x
=
bcast_qdq_instr
(
"dequantizelinear"
,
in_x
,
scale_x
,
zero_pt_x
,
info
);
auto
dquant_x
=
bcast_qdq_instr
(
"dequantizelinear"
,
in_x
,
scale_x
,
zero_pt_x
,
info
);
// Output Y = globalaveragepool(X)
// Output Y = pooling_op(X)
auto
op
=
migraphx
::
op
::
pooling
{
migraphx
::
op
::
pooling_mode
::
average
};
auto
lens
=
in_x
->
get_shape
().
lens
();
std
::
vector
<
size_t
>
lengths
(
lens
.
begin
()
+
2
,
lens
.
end
());
op
.
lengths
=
lengths
;
op
.
padding
=
std
::
vector
<
size_t
>
(
lens
.
size
());
auto
out_y
=
info
.
add_instruction
(
op
,
dquant_x
);
const
auto
&
scale_y
=
args
[
3
];
auto
out_y
=
add_pooling_op
(
opd
,
info
,
dquant_x
);
const
auto
&
zero_pt_y
=
args
[
4
];
auto
out_quant_y
=
bcast_qdq_instr
(
"quantizelinear"
,
out_y
,
scale_y
,
zero_pt_y
,
info
);
const
auto
&
in_scale_y
=
args
[
3
];
// zero_pt for Y is supplied as the last optional argument..
if
(
args
.
size
()
==
5
)
return
(
bcast_qdq_instr
(
"quantizelinear"
,
out_y
,
in_scale_y
,
args
[
4
],
info
));
return
out_quant_y
;
// if no zero_pt: just broadcast the scale..
auto
bcast_scale_y
=
bcast_scalar_instr
(
out_y
->
get_shape
(),
in_scale_y
,
info
);
return
(
info
.
add_instruction
(
migraphx
::
make_op
(
"quantizelinear"
),
out_y
,
bcast_scale_y
));
}
}
};
};
...
...
src/onnx/parse_scatternd.cpp
View file @
8d7a8a6c
...
@@ -39,15 +39,17 @@ struct parse_scatternd : op_parser<parse_scatternd>
...
@@ -39,15 +39,17 @@ struct parse_scatternd : op_parser<parse_scatternd>
const
onnx_parser
::
node_info
&
info
,
const
onnx_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>&
args
)
const
std
::
vector
<
instruction_ref
>&
args
)
const
{
{
std
::
string
reduction
=
"none"
;
if
(
contains
(
info
.
attributes
,
"reduction"
))
if
(
contains
(
info
.
attributes
,
"reduction"
))
{
{
if
(
info
.
attributes
.
at
(
"reduction"
).
s
()
==
"add"
)
reduction
=
info
.
attributes
.
at
(
"reduction"
).
s
();
return
info
.
add_instruction
(
migraphx
::
make_op
(
"scatternd_add"
),
args
);
if
(
not
contains
({
"none"
,
"add"
,
"mul"
,
"min"
,
"max"
},
reduction
))
if
(
info
.
attributes
.
at
(
"reduction"
).
s
()
==
"mul"
)
{
return
info
.
add_instruction
(
migraphx
::
make_op
(
"scatternd_mul"
),
args
);
MIGRAPHX_THROW
(
"PARSE_SCATTERND: unsupported reduction mode "
+
reduction
);
}
}
}
return
info
.
add_instruction
(
migraphx
::
make_op
(
"scatternd_
none"
),
args
);
return
info
.
add_instruction
(
migraphx
::
make_op
(
"scatternd_
"
+
reduction
),
args
);
}
}
};
};
...
...
src/
targets/gpu/device/pad
.cpp
→
src/
onnx/parse_unique
.cpp
View file @
8d7a8a6c
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -21,46 +21,72 @@
...
@@ -21,46 +21,72 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/clamp.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/gpu/device/pad.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <optional>
#include <migraphx/float_equal.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
onnx
{
namespace
device
{
// generate unique output stream y, given input stream x;
//
// case unsorted:
// input x: [2, 1, 1, 3, 4, 3], attr_sorted = 0;
// output(s):
// y: [2, 1, 3, 4] --- the unique output
// y_indices: [0, 1, 3, 4] --- first incidence, in terms of indices of x
// x_rev_indices: [0, 1, 1, 2, 3, 2] --- x seen in terms of indices of y
// y_count: [1, 2, 2, 1] -- count at each y_index. sum = len(x)
//
// case sorted:
// input x: [2, 1, 1, 3, 4, 3], attr_sorted = 1;
// output(s):
// y: [1, 2, 3, 4] --- the unique output
// y_indices: [1, 0, 3, 4] --- first incidence, in terms of indices of x
// x_rev_indices: [1, 0, 0, 2, 3, 2] --- x seen in terms of indices of y
// y_count: [2, 1, 2, 1] -- count at each y_index. sum = len(x)
argument
struct
parse_unique
:
op_parser
<
parse_unique
>
pad
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
float
value
,
std
::
vector
<
std
::
int64_t
>
pads
)
{
{
std
::
size_t
nelements
=
arg1
.
get_shape
().
elements
();
hip_visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
using
hip_index
=
typename
decltype
(
output
)
::
hip_index
;
type
device_val
=
pad_clamp
<
host_type
<
type
>>
(
value
);
gs_launch
(
stream
,
result
.
get_shape
().
elements
())(
[
=
](
auto
i
)
__device__
{
output
.
data
()[
i
]
=
device_val
;
});
hip_index
offsets
;
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Unique"
}};
}
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
__device__
{
std
::
vector
<
instruction_ref
>
parse
(
const
op_desc
&
opd
,
auto
idx
=
input
.
get_shape
().
multi
(
i
);
const
onnx_parser
&
parser
,
for
(
std
::
size_t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
const
onnx_parser
::
node_info
&
info
,
{
std
::
vector
<
instruction_ref
>
args
)
const
idx
[
j
]
+=
offsets
[
j
];
{
}
int64_t
sorted
=
1
;
// default = sorted.
output
[
idx
]
=
input
.
data
()[
i
];
});
if
(
contains
(
info
.
attributes
,
"sorted"
))
});
sorted
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"sorted"
)).
at
<
int
>
();
return
result
;
}
std
::
optional
<
int64_t
>
axis
;
if
(
contains
(
info
.
attributes
,
"axis"
))
{
auto
n_dim
=
args
[
0
]
->
get_shape
().
ndim
();
axis
=
parser
.
parse_value
(
info
.
attributes
.
at
(
"axis"
)).
at
<
int
>
();
axis
=
tune_axis
(
n_dim
,
*
axis
,
opd
.
op_name
);
}
migraphx
::
argument
data_arg
=
args
.
back
()
->
eval
();
auto
opr
=
axis
?
migraphx
::
make_op
(
"unique"
,
{{
"axis"
,
*
axis
},
{
"sorted"
,
sorted
}})
:
migraphx
::
make_op
(
"unique"
,
{{
"sorted"
,
sorted
}});
auto
u_opr
=
info
.
add_instruction
(
opr
,
args
.
at
(
0
));
auto
i_y
=
info
.
add_instruction
(
make_op
(
"get_tuple_elem"
,
{{
"index"
,
0
}}),
u_opr
);
auto
i_y_idx
=
info
.
add_instruction
(
make_op
(
"get_tuple_elem"
,
{{
"index"
,
1
}}),
u_opr
);
auto
i_x_idx
=
info
.
add_instruction
(
make_op
(
"get_tuple_elem"
,
{{
"index"
,
2
}}),
u_opr
);
auto
i_count
=
info
.
add_instruction
(
make_op
(
"get_tuple_elem"
,
{{
"index"
,
3
}}),
u_opr
);
return
{
i_y
,
i_y_idx
,
i_x_idx
,
i_count
};
}
};
}
// namespace device
}
// namespace onnx
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/onnx/pooling.cpp
0 → 100644
View file @
8d7a8a6c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/onnx/pooling.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/padding.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/ranges.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
onnx
{
value
handle_pooling_values
(
const
op_desc
&
opd
,
onnx_parser
::
node_info
info
,
const
shape
&
in_shape
,
value
values
)
{
auto
kdims
=
in_shape
.
ndim
()
-
2
;
if
(
starts_with
(
opd
.
onnx_name
,
"Global"
)
or
starts_with
(
opd
.
onnx_name
,
"QLinearGlobal"
))
{
// if spatial dimensions are dynamic use dyn_global flag
if
(
in_shape
.
dynamic
()
and
std
::
any_of
(
in_shape
.
dyn_dims
().
cbegin
()
+
2
,
in_shape
.
dyn_dims
().
cend
(),
[](
auto
dd
)
{
return
not
dd
.
is_fixed
();
}))
{
values
[
"dyn_global"
]
=
true
;
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
();
}
else
{
// works with static and fixed dynamic shape
auto
m_lens
=
in_shape
.
max_lens
();
values
[
"lengths"
]
=
std
::
vector
<
size_t
>
(
m_lens
.
begin
()
+
2
,
m_lens
.
end
());
}
}
if
(
contains
(
info
.
attributes
,
"ceil_mode"
))
{
values
[
"ceil_mode"
]
=
static_cast
<
bool
>
(
info
.
attributes
.
at
(
"ceil_mode"
).
i
());
}
if
(
contains
(
info
.
attributes
,
"strides"
))
{
values
[
"stride"
].
clear
();
copy
(
info
.
attributes
[
"strides"
].
ints
(),
std
::
back_inserter
(
values
[
"stride"
]));
check_attr_sizes
(
kdims
,
values
[
"stride"
].
size
(),
"PARSE_POOLING: inconsistent strides"
);
}
if
(
contains
(
info
.
attributes
,
"kernel_shape"
))
{
values
[
"lengths"
].
clear
();
copy
(
info
.
attributes
[
"kernel_shape"
].
ints
(),
std
::
back_inserter
(
values
[
"lengths"
]));
check_attr_sizes
(
kdims
,
values
[
"lengths"
].
size
(),
"PARSE_POOLING: inconsistent lengths"
);
}
if
(
contains
(
info
.
attributes
,
"dilations"
))
{
values
[
"dilations"
].
clear
();
copy
(
info
.
attributes
[
"dilations"
].
ints
(),
std
::
back_inserter
(
values
[
"dilations"
]));
check_attr_sizes
(
kdims
,
values
[
"dilations"
].
size
(),
"PARSE_POOLING: inconsistent dilations"
);
}
// lp_order attribute
if
(
contains
(
info
.
attributes
,
"p"
))
{
values
[
"lp_order"
]
=
info
.
attributes
.
at
(
"p"
).
i
();
}
// ensure pads available only when auto_pad is "NOT_SET"
check_padding_mode
(
info
,
"POOLING"
);
return
values
;
}
instruction_ref
add_pooling_op
(
const
op_desc
&
opd
,
onnx_parser
::
node_info
info
,
instruction_ref
l0
)
{
std
::
string
mode
=
opd
.
op_name
;
const
std
::
unordered_map
<
std
::
string
,
op
::
pooling_mode
>
mode_map
=
{
{
"max"
,
op
::
pooling_mode
::
max
},
{
"average"
,
op
::
pooling_mode
::
average
},
{
"lpnorm"
,
op
::
pooling_mode
::
lpnorm
}};
if
(
not
contains
(
mode_map
,
mode
))
{
MIGRAPHX_THROW
(
"PARSE_POOLING: onnx pooling mode must be [
\"
max
\"
,
\"
average
\"
,
\"
lpnorm
\"
]"
);
}
operation
op
=
make_op
(
"pooling"
,
{{
"mode"
,
mode_map
.
at
(
mode
)}});
value
values
=
op
.
to_value
();
auto
in_shape
=
l0
->
get_shape
();
assert
(
in_shape
.
ndim
()
>
2
);
auto
kdims
=
in_shape
.
ndim
()
-
2
;
values
=
handle_pooling_values
(
opd
,
info
,
in_shape
,
values
);
// count include padding, if count include pad is 1, we always use
// explicit pad
int
count_include_pad
=
0
;
if
(
contains
(
info
.
attributes
,
"count_include_pad"
))
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: count_include_pad attribute is not supported for "
"dynamic input shape"
);
}
count_include_pad
=
info
.
attributes
.
at
(
"count_include_pad"
).
i
();
}
std
::
vector
<
int64_t
>
paddings
;
float
pad_val
=
((
mode
==
"max"
)
?
std
::
numeric_limits
<
float
>::
lowest
()
:
0.0
f
);
if
(
contains
(
info
.
attributes
,
"pads"
))
{
values
[
"padding"
].
clear
();
copy
(
info
.
attributes
[
"pads"
].
ints
(),
std
::
back_inserter
(
paddings
));
check_attr_sizes
(
kdims
,
paddings
.
size
()
/
2
,
"PARSE_POOLING: inconsistent explicit paddings"
);
}
if
(
paddings
.
size
()
!=
2
*
kdims
)
{
paddings
.
resize
(
kdims
*
2
);
std
::
fill_n
(
paddings
.
begin
(),
2
*
kdims
,
0
);
}
if
(
values
[
"padding"
].
size
()
!=
kdims
)
{
values
[
"padding"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"padding"
].
begin
(),
kdims
,
0
);
}
if
(
values
[
"stride"
].
size
()
!=
kdims
)
{
values
[
"stride"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"stride"
].
begin
(),
kdims
,
1
);
}
if
(
values
[
"dilations"
].
size
()
!=
kdims
)
{
values
[
"dilations"
].
resize
(
kdims
);
std
::
fill_n
(
values
[
"dilations"
].
begin
(),
kdims
,
1
);
}
// used to calculate the supposed output shape
std
::
vector
<
int64_t
>
orig_padding
=
paddings
;
// TODO: add parsing for dilations
if
(
contains
(
info
.
attributes
,
"auto_pad"
)
and
to_upper
(
info
.
attributes
[
"auto_pad"
].
s
())
!=
"NOTSET"
)
{
auto
auto_pad
=
to_upper
(
info
.
attributes
[
"auto_pad"
].
s
());
// don't use the given padding sizes, if any
// values["padding"].clear();
if
(
in_shape
.
dynamic
())
{
// set padding_mode to trigger auto padding at runtime
bool
is_same_upper
=
(
auto_pad
.
find
(
"SAME_UPPER"
)
!=
std
::
string
::
npos
);
values
[
"padding_mode"
]
=
is_same_upper
?
to_value
(
op
::
padding_mode_t
::
same_upper
)
:
to_value
(
op
::
padding_mode_t
::
same_lower
);
}
else
{
// Calculate auto padding
// dilations (argument 4) not supported; default to all 1's
cal_auto_padding_size
(
info
,
values
,
values
[
"lengths"
].
to_vector
<
std
::
size_t
>
(),
values
[
"dilations"
].
to_vector
<
std
::
size_t
>
(),
in_shape
.
lens
(),
paddings
);
values
[
"padding"
]
=
paddings
;
// default padding_mode indicates that padding sizes are not calculated dynamically
values
[
"padding_mode"
]
=
migraphx
::
op
::
padding_mode_t
::
default_
;
}
}
std
::
vector
<
int64_t
>
slice_start
;
std
::
vector
<
int64_t
>
slice_end
;
tune_padding_size
(
values
,
paddings
,
count_include_pad
,
slice_start
);
if
(
not
slice_start
.
empty
())
{
if
(
in_shape
.
dynamic
())
{
MIGRAPHX_THROW
(
"PARSE_POOLING: asymmetric padding not supported for dynamic input shape"
);
}
// calculate expected output shape
orig_padding
.
insert
(
orig_padding
.
begin
()
+
kdims
,
2
,
0
);
orig_padding
.
insert
(
orig_padding
.
begin
(),
2
,
0
);
op
::
pad
pad
{
orig_padding
,
0.0
f
};
shape
padded_shape
=
pad
.
compute_shape
({
l0
->
get_shape
()});
// make an op just to get its output shape
auto
out_lens
=
make_op
(
"pooling"
,
values
).
compute_shape
({
padded_shape
}).
lens
();
// compute slice_end information
slice_end
.
resize
(
slice_start
.
size
());
std
::
transform
(
out_lens
.
begin
()
+
2
,
out_lens
.
end
(),
slice_start
.
begin
(),
slice_end
.
begin
(),
[](
auto
i
,
auto
j
)
{
return
i
+
j
;
});
}
values
[
"padding"
]
=
std
::
vector
<
size_t
>
(
paddings
.
begin
(),
paddings
.
end
());
check_asym_padding
(
info
,
l0
,
paddings
,
values
,
count_include_pad
,
pad_val
);
op
.
from_value
(
values
);
auto
l1
=
info
.
add_instruction
(
op
,
l0
);
if
(
not
slice_start
.
empty
())
{
std
::
vector
<
int64_t
>
axes
(
kdims
);
std
::
iota
(
axes
.
begin
(),
axes
.
end
(),
2
);
l1
=
info
.
add_instruction
(
make_op
(
"slice"
,
{{
"axes"
,
axes
},
{
"starts"
,
slice_start
},
{
"ends"
,
slice_end
}}),
l1
);
}
return
l1
;
}
}
// namespace onnx
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/cpu/dnnl.cpp
View file @
8d7a8a6c
...
@@ -68,6 +68,7 @@ dnnl::memory::data_type to_dnnl_memory_data_type(shape::type_t t)
...
@@ -68,6 +68,7 @@ dnnl::memory::data_type to_dnnl_memory_data_type(shape::type_t t)
case
st
::
int32_type
:
return
dt
::
s32
;
case
st
::
int32_type
:
return
dt
::
s32
;
case
st
::
int8_type
:
return
dt
::
s8
;
case
st
::
int8_type
:
return
dt
::
s8
;
case
st
::
uint8_type
:
return
dt
::
u8
;
case
st
::
uint8_type
:
return
dt
::
u8
;
case
st
::
fp8e4m3fnuz_type
:
MIGRAPHX_THROW
(
"fp8e4m3fnuz unsupported in DNNL"
);
default:
MIGRAPHX_THROW
(
"Unsupported data type"
);
default:
MIGRAPHX_THROW
(
"Unsupported data type"
);
}
}
}
}
...
...
src/targets/cpu/lowering.cpp
View file @
8d7a8a6c
...
@@ -340,7 +340,6 @@ struct cpu_apply
...
@@ -340,7 +340,6 @@ struct cpu_apply
{
"reduce_min"
,
"reduction_min"
},
{
"reduce_min"
,
"reduction_min"
},
{
"reduce_sum"
,
"reduction_sum"
},
{
"reduce_sum"
,
"reduction_sum"
},
});
});
extend_op
(
"concat"
,
"dnnl::concat"
);
extend_op
(
"concat"
,
"dnnl::concat"
);
extend_op
(
"contiguous"
,
"dnnl::reorder"
);
extend_op
(
"contiguous"
,
"dnnl::reorder"
);
extend_op
(
"convolution"
,
"dnnl::convolution"
);
extend_op
(
"convolution"
,
"dnnl::convolution"
);
...
@@ -376,6 +375,12 @@ struct cpu_apply
...
@@ -376,6 +375,12 @@ struct cpu_apply
// Apply these operators first so the inputs can be const folded
// Apply these operators first so the inputs can be const folded
for
(
auto
it
:
iterator_for
(
*
modl
))
for
(
auto
it
:
iterator_for
(
*
modl
))
{
{
// skip lowering if input has fp8 as one of the inputs since oneDNN doesn't have fp8
// supported yet.
if
(
std
::
any_of
(
it
->
inputs
().
begin
(),
it
->
inputs
().
end
(),
[](
const
auto
&
i
)
{
return
i
->
get_shape
().
type
()
==
migraphx
::
shape
::
fp8e4m3fnuz_type
;
}))
continue
;
if
(
it
->
name
()
==
"pow"
)
if
(
it
->
name
()
==
"pow"
)
{
{
apply_pow
(
it
);
apply_pow
(
it
);
...
@@ -383,6 +388,12 @@ struct cpu_apply
...
@@ -383,6 +388,12 @@ struct cpu_apply
}
}
for
(
auto
it
:
iterator_for
(
*
modl
))
for
(
auto
it
:
iterator_for
(
*
modl
))
{
{
// skip lowering if input has fp8 as one of the inputs since oneDNN doesn't have fp8
// supported yet.
if
(
std
::
any_of
(
it
->
inputs
().
begin
(),
it
->
inputs
().
end
(),
[](
const
auto
&
i
)
{
return
i
->
get_shape
().
type
()
==
migraphx
::
shape
::
fp8e4m3fnuz_type
;
}))
continue
;
if
(
it
->
name
()
==
"pooling"
)
if
(
it
->
name
()
==
"pooling"
)
{
{
apply_pooling
(
it
);
apply_pooling
(
it
);
...
...
src/targets/gpu/CMakeLists.txt
View file @
8d7a8a6c
...
@@ -126,7 +126,6 @@ add_library(migraphx_gpu
...
@@ -126,7 +126,6 @@ add_library(migraphx_gpu
fuse_ck.cpp
fuse_ck.cpp
fuse_mlir.cpp
fuse_mlir.cpp
fuse_ops.cpp
fuse_ops.cpp
gather.cpp
gemm_impl.cpp
gemm_impl.cpp
hip.cpp
hip.cpp
kernel.cpp
kernel.cpp
...
@@ -140,7 +139,6 @@ add_library(migraphx_gpu
...
@@ -140,7 +139,6 @@ add_library(migraphx_gpu
nonzero.cpp
nonzero.cpp
pack_args.cpp
pack_args.cpp
prefuse_ops.cpp
prefuse_ops.cpp
pad.cpp
perfdb.cpp
perfdb.cpp
pooling.cpp
pooling.cpp
reverse.cpp
reverse.cpp
...
@@ -168,12 +166,10 @@ endfunction()
...
@@ -168,12 +166,10 @@ endfunction()
register_migraphx_gpu_ops
(
hip_
register_migraphx_gpu_ops
(
hip_
argmax
argmax
argmin
argmin
gather
logsoftmax
logsoftmax
loop
loop
multinomial
multinomial
nonzero
nonzero
pad
prefix_scan_sum
prefix_scan_sum
reverse
reverse
scatter
scatter
...
@@ -263,6 +259,8 @@ check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCAT
...
@@ -263,6 +259,8 @@ check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCAT
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
check_library_exists
(
MIOpen
"miopenFindSolutions"
"
${
MIOPEN_LOCATION
}
"
HAS_FIND_2_API
)
# Beta API for automated GEMM tuning
# Beta API for automated GEMM tuning
check_library_exists
(
roc::rocblas
"rocblas_gemm_ex_get_solutions"
"
${
ROCBLAS_LOCATION
}
"
HAS_ROCBLAS_TUNING_BETA_FEATURE_API
)
check_library_exists
(
roc::rocblas
"rocblas_gemm_ex_get_solutions"
"
${
ROCBLAS_LOCATION
}
"
HAS_ROCBLAS_TUNING_BETA_FEATURE_API
)
# rocblas FP8 API
check_library_exists
(
roc::rocblas
"rocblas_gemm_strided_batched_ex3"
"
${
ROCBLAS_LOCATION
}
"
HAS_ROCBLAS_FP8_BETA_API
)
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
set
(
MIGRAPHX_USE_FIND_2_API
"
${
HAS_FIND_2_API
}
"
CACHE BOOL
""
)
...
@@ -292,10 +290,18 @@ else()
...
@@ -292,10 +290,18 @@ else()
message
(
STATUS
"rocBLAS does not have User Tuning Beta API"
)
message
(
STATUS
"rocBLAS does not have User Tuning Beta API"
)
endif
()
endif
()
if
(
HAS_ROCBLAS_FP8_BETA_API
)
target_compile_definitions
(
migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_FP8_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS
)
message
(
STATUS
"MIGraphX is using Beta API of rocBLAS for FP8 computations"
)
else
()
message
(
STATUS
"rocBLAS does not have Fp8 Beta API"
)
endif
()
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
target_link_libraries
(
migraphx_gpu PRIVATE migraphx_device migraphx_kernels
)
if
(
MIGRAPHX_USE_COMPOSABLEKERNEL
)
if
(
MIGRAPHX_USE_COMPOSABLEKERNEL
)
target_link_libraries
(
migraphx_gpu PRIVATE composable_kernel::jit_library
)
target_link_libraries
(
migraphx_gpu PRIVATE composable_kernel::jit_library
)
target_compile_definitions
(
migraphx_gpu PRIVATE MIGRAPHX_USE_COMPOSABLEKERNEL=1
)
endif
()
endif
()
add_subdirectory
(
driver
)
add_subdirectory
(
driver
)
...
...
src/targets/gpu/compile_gen.cpp
View file @
8d7a8a6c
...
@@ -54,6 +54,11 @@ vectorize vectorize::elements(std::size_t axis,
...
@@ -54,6 +54,11 @@ vectorize vectorize::elements(std::size_t axis,
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
sizes
)
const
std
::
vector
<
std
::
size_t
>&
sizes
)
{
{
// disable vectorization for fp8 types
if
(
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
ishape
)
{
return
ishape
.
type
()
==
migraphx
::
shape
::
fp8e4m3fnuz_type
;
}))
return
{
1
,
axis
};
if
(
std
::
all_of
(
if
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
()[
axis
]
==
1
;
}))
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
()[
axis
]
==
1
;
}))
return
{
1
,
axis
};
return
{
1
,
axis
};
...
@@ -86,6 +91,11 @@ vectorize vectorize::elements(std::size_t axis,
...
@@ -86,6 +91,11 @@ vectorize vectorize::elements(std::size_t axis,
vectorize
vectorize
::
elements
(
context
&
ctx
,
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
vectorize
vectorize
::
elements
(
context
&
ctx
,
std
::
size_t
axis
,
const
std
::
vector
<
shape
>&
inputs
)
{
{
// disable vectorization for fp8 types
if
(
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[
&
](
auto
ishape
)
{
return
ishape
.
type
()
==
migraphx
::
shape
::
fp8e4m3fnuz_type
;
}))
return
{
1
,
axis
};
if
(
inputs
.
empty
())
if
(
inputs
.
empty
())
return
{
1
,
axis
};
return
{
1
,
axis
};
std
::
size_t
n
=
std
::
max_element
(
inputs
.
begin
(),
std
::
size_t
n
=
std
::
max_element
(
inputs
.
begin
(),
...
...
src/targets/gpu/fuse_mlir.cpp
View file @
8d7a8a6c
This diff is collapsed.
Click to expand it.
src/targets/gpu/gemm_impl.cpp
View file @
8d7a8a6c
...
@@ -22,11 +22,14 @@
...
@@ -22,11 +22,14 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <rocblas/internal/rocblas-types.h>
#include <rocblas/rocblas.h>
#include <rocblas/rocblas.h>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/time.hpp>
#include <migraphx/time.hpp>
#include <type_traits>
using
microseconds
=
std
::
chrono
::
duration
<
double
,
std
::
micro
>
;
using
microseconds
=
std
::
chrono
::
duration
<
double
,
std
::
micro
>
;
...
@@ -34,6 +37,20 @@ namespace migraphx {
...
@@ -34,6 +37,20 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
/*
Regular rocBLAS API takes compute_type as `rocblas_datatype` enum value v/s "ex3" BETA API takes it
as `rocblas_computetype` enum value. `rb_compute_type` is faciliator to implictly cast integer enum
value to required type that can be used inside `common_args` generator.
*/
struct
rb_compute_type
{
int
type
=
0
;
rb_compute_type
(
rocblas_datatype
t
)
:
type
(
static_cast
<
int
>
(
t
))
{}
rb_compute_type
(
rocblas_computetype
t
)
:
type
(
static_cast
<
int
>
(
t
))
{}
operator
rocblas_datatype
()
const
{
return
static_cast
<
rocblas_datatype
>
(
type
);
}
operator
rocblas_computetype
()
const
{
return
static_cast
<
rocblas_computetype
>
(
type
);
}
};
// Convert rocBLAS datatypes to equivalent Migraphx data types
// Convert rocBLAS datatypes to equivalent Migraphx data types
rocblas_datatype
get_type
(
shape
::
type_t
type
)
rocblas_datatype
get_type
(
shape
::
type_t
type
)
{
{
...
@@ -46,7 +63,7 @@ rocblas_datatype get_type(shape::type_t type)
...
@@ -46,7 +63,7 @@ rocblas_datatype get_type(shape::type_t type)
case
shape
::
uint8_type
:
return
rocblas_datatype_u8_r
;
case
shape
::
uint8_type
:
return
rocblas_datatype_u8_r
;
case
shape
::
int32_type
:
return
rocblas_datatype_i32_r
;
case
shape
::
int32_type
:
return
rocblas_datatype_i32_r
;
case
shape
::
uint32_type
:
return
rocblas_datatype_u32_r
;
case
shape
::
uint32_type
:
return
rocblas_datatype_u32_r
;
case
shape
::
fp8e4m3fnuz_type
:
case
shape
::
fp8e4m3fnuz_type
:
return
rocblas_datatype_f8_r
;
case
shape
::
tuple_type
:
case
shape
::
tuple_type
:
case
shape
::
bool_type
:
case
shape
::
bool_type
:
case
shape
::
uint16_type
:
case
shape
::
uint16_type
:
...
@@ -183,12 +200,17 @@ struct gemm_impl
...
@@ -183,12 +200,17 @@ struct gemm_impl
{
{
output_type
=
rocblas_datatype_i32_r
;
output_type
=
rocblas_datatype_i32_r
;
}
}
compute_type
=
output_type
;
compute_type
=
rb_compute_type
{
output_type
}
;
if
(
compute_fp32
)
if
(
compute_fp32
)
{
{
if
(
arg_type
==
rocblas_datatype_f16_r
)
if
(
arg_type
==
rocblas_datatype_f16_r
)
compute_type
=
rocblas_datatype_f32_r
;
compute_type
=
rocblas_datatype_f32_r
;
}
}
if
(
arg_type
==
rocblas_datatype_f8_r
)
{
assert
(
get_type
(
input_shapes
[
1
].
type
())
==
rocblas_datatype_f8_r
);
compute_type
=
rocblas_compute_type_f32
;
}
auto
a_lens
=
input_shapes
[
0
].
lens
();
auto
a_lens
=
input_shapes
[
0
].
lens
();
auto
b_lens
=
input_shapes
[
1
].
lens
();
auto
b_lens
=
input_shapes
[
1
].
lens
();
...
@@ -217,23 +239,52 @@ struct gemm_impl
...
@@ -217,23 +239,52 @@ struct gemm_impl
void
run
(
context
&
ctx
,
const
std
::
vector
<
argument
>&
input_args
,
int32_t
solution_idx
=
0
)
const
void
run
(
context
&
ctx
,
const
std
::
vector
<
argument
>&
input_args
,
int32_t
solution_idx
=
0
)
const
{
{
if
(
strided_batched
)
#ifdef MIGRAPHX_USE_ROCBLAS_FP8_API
if
(
rocblas_fp8_available
()
and
std
::
any_of
(
input_args
.
begin
(),
input_args
.
end
(),
[](
const
auto
i
)
{
return
i
.
get_shape
().
type
()
==
migraphx
::
shape
::
fp8e4m3fnuz_type
;
}))
{
{
auto
common_args
=
create_strided_batched_args_common
(
ctx
,
input_args
);
if
(
strided_batched
)
rocblas_invoke
(
&
rocblas_gemm_strided_batched_ex
,
{
common_args
,
auto
common_args
=
create_strided_batched_args_common
(
ctx
,
input_args
);
rocblas_gemm_algo_solution_index
,
rocblas_invoke
(
&
rocblas_gemm_strided_batched_ex3
,
solution_idx
,
common_args
,
gemm_flags
);
rocblas_gemm_algo_standard
,
solution_idx
,
gemm_flags
);
}
else
{
auto
common_args
=
create_gemm_ex_args_common
(
ctx
,
input_args
);
rocblas_invoke
(
&
rocblas_gemm_ex3
,
common_args
,
rocblas_gemm_algo_standard
,
solution_idx
,
gemm_flags
);
}
}
}
else
else
#endif
{
{
auto
common_args
=
create_gemm_ex_args_common
(
ctx
,
input_args
);
if
(
strided_batched
)
rocblas_invoke
(
&
rocblas_gemm_ex
,
{
common_args
,
auto
common_args
=
create_strided_batched_args_common
(
ctx
,
input_args
);
rocblas_gemm_algo_solution_index
,
rocblas_invoke
(
&
rocblas_gemm_strided_batched_ex
,
solution_idx
,
common_args
,
gemm_flags
);
rocblas_gemm_algo_solution_index
,
solution_idx
,
gemm_flags
);
}
else
{
auto
common_args
=
create_gemm_ex_args_common
(
ctx
,
input_args
);
rocblas_invoke
(
&
rocblas_gemm_ex
,
common_args
,
rocblas_gemm_algo_solution_index
,
solution_idx
,
gemm_flags
);
}
}
}
}
}
...
@@ -331,7 +382,6 @@ struct gemm_impl
...
@@ -331,7 +382,6 @@ struct gemm_impl
num_matrices
,
num_matrices
,
compute_type
);
compute_type
);
}
}
/**
/**
* Helper method to create that subset of a long rocBLAS argument list that is common
* Helper method to create that subset of a long rocBLAS argument list that is common
* to multiple "gemm_ex..." calls.
* to multiple "gemm_ex..." calls.
...
@@ -366,6 +416,7 @@ struct gemm_impl
...
@@ -366,6 +416,7 @@ struct gemm_impl
ldd
,
ldd
,
compute_type
);
compute_type
);
}
}
#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API
#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API
/**
/**
* Find best rocBLAS solution: Get list of solutions and try them all, returning the index
* Find best rocBLAS solution: Get list of solutions and try them all, returning the index
...
@@ -481,8 +532,8 @@ struct gemm_impl
...
@@ -481,8 +532,8 @@ struct gemm_impl
rocblas_int
b_stride
=
0
;
rocblas_int
b_stride
=
0
;
rocblas_int
c_stride
=
0
;
rocblas_int
c_stride
=
0
;
rocblas_int
d_stride
=
0
;
rocblas_int
d_stride
=
0
;
rocblas_datatype
compute_type
=
rocblas_datatype_f32_r
;
rocblas_datatype
arg_type
=
rocblas_datatype_f32_r
;
rocblas_datatype
arg_type
=
rocblas_datatype_f32_r
;
rb_compute_type
compute_type
=
rocblas_datatype_f32_r
;
rocblas_datatype
output_type
=
rocblas_datatype_f32_r
;
rocblas_datatype
output_type
=
rocblas_datatype_f32_r
;
bool
strided_batched
=
true
;
bool
strided_batched
=
true
;
bool
is_3inputs
=
true
;
bool
is_3inputs
=
true
;
...
...
src/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp
View file @
8d7a8a6c
...
@@ -34,10 +34,11 @@ struct module_pass_manager;
...
@@ -34,10 +34,11 @@ struct module_pass_manager;
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_GPU_EXPORT
bool
mlir_enabled
();
MIGRAPHX_GPU_EXPORT
bool
mlir_enabled
();
MIGRAPHX_GPU_EXPORT
bool
mlir_attention_enabled
();
struct
MIGRAPHX_GPU_EXPORT
fuse_mlir
struct
MIGRAPHX_GPU_EXPORT
fuse_mlir
{
{
context
*
ctx
=
nullptr
;
context
*
ctx
=
nullptr
;
bool
enable_extra
=
false
;
bool
enable_extra
=
false
;
std
::
string
name
()
const
{
return
"gpu::fuse_mlir"
;
}
std
::
string
name
()
const
{
return
"gpu::fuse_mlir"
;
}
void
apply
(
module_pass_manager
&
mpm
)
const
;
void
apply
(
module_pass_manager
&
mpm
)
const
;
...
...
src/targets/gpu/include/migraphx/gpu/gemm_softmax_gemm.hpp
View file @
8d7a8a6c
...
@@ -66,6 +66,10 @@ struct gemm_softmax_gemm
...
@@ -66,6 +66,10 @@ struct gemm_softmax_gemm
}
}
static
bool
is_ck_supported_type
(
shape
::
type_t
t
)
{
return
contains
({
shape
::
half_type
},
t
);
}
static
bool
is_ck_supported_type
(
shape
::
type_t
t
)
{
return
contains
({
shape
::
half_type
},
t
);
}
static
bool
is_mlir_supported_type
(
shape
::
type_t
t
)
{
return
contains
({
shape
::
type_t
::
float_type
,
shape
::
half_type
},
t
);
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/rocblas.hpp
View file @
8d7a8a6c
...
@@ -40,6 +40,8 @@ struct context;
...
@@ -40,6 +40,8 @@ struct context;
MIGRAPHX_GPU_EXPORT
bool
get_compute_fp32_flag
();
MIGRAPHX_GPU_EXPORT
bool
get_compute_fp32_flag
();
MIGRAPHX_GPU_EXPORT
bool
rocblas_fp8_available
();
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/
include/migraphx/gpu/pad
.hpp
→
src/targets/gpu/
jit/scatter
.hpp
View file @
8d7a8a6c
/*
/*
* The MIT License (MIT)
* The MIT License (MIT)
*
*
* Copyright (c) 2015-202
2
Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-202
3
Advanced Micro Devices, Inc. All rights reserved.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* of this software and associated documentation files (the "Software"), to deal
...
@@ -21,41 +21,58 @@
...
@@ -21,41 +21,58 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_
RTGLIB_PAD
_HPP
#ifndef MIGRAPHX_GUARD_
JIT_SCATTER
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_PAD
_HPP
#define MIGRAPHX_GUARD_
JIT_SCATTER
_HPP
#include <migraphx/argument.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/reflect.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
struct
context
;
template
<
typename
Derived
>
struct
scatter_compiler
:
compiler
<
Derived
>
struct
hip_pad
{
{
op
::
pad
op
;
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
const
auto
inputs
=
to_shapes
(
std
::
vector
<
instruction_ref
>
{
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()});
hip_compile_options
options
;
options
.
set_launch_params
(
op
.
to_value
(),
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
kernel_name
=
derived
().
get_kernel_name
(
op
);
options
.
virtual_inputs
=
inputs
;
// The compiler protests the inequality comparison in assign_mul when pertaining to floating
// point, despite it making sense in the context. Thus the warning removal.
options
.
params
+=
"-Wno-float-equal"
;
const
auto
src
=
derived
().
make_interpolated_string
(
op
);
return
prepend_copy_data_to_output
(
compile_hip_code_object
(
src
,
options
));
}
}
std
::
string
name
()
const
{
return
"gpu::pad"
;
}
compiler_replace
prepend_copy_data_to_output
(
const
operation
&
co
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
{
co
,
[](
module
&
m
,
instruction_ref
ins
,
const
operation
&
op
)
{
auto
args
=
ins
->
inputs
();
args
.
back
()
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::copy"
),
args
.
front
(),
args
.
back
());
args
.
erase
(
args
.
begin
());
return
m
.
replace_instruction
(
ins
,
op
,
args
);
}};
}
}
std
::
string
get_kernel_name
(
const
operation
&
op
)
const
{
return
op
.
name
()
+
"_kernel"
;
}
const
Derived
&
derived
()
const
{
return
static_cast
<
const
Derived
&>
(
*
this
);
}
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
#endif
src/targets/gpu/jit/scatternd.cpp
View file @
8d7a8a6c
...
@@ -21,11 +21,7 @@
...
@@ -21,11 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/compiler.hpp>
#include "scatter.hpp"
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -55,46 +51,21 @@ MIGRAPHX_GLOBAL void scatternd_kernel(void* in_indices, void* in_updates, void*
...
@@ -55,46 +51,21 @@ MIGRAPHX_GLOBAL void scatternd_kernel(void* in_indices, void* in_updates, void*
)__migraphx__"
;
)__migraphx__"
;
struct
scatternd_compiler
:
compiler
<
scatternd_compiler
>
struct
scatternd_compiler
:
scatter_
compiler
<
scatternd_compiler
>
{
{
std
::
vector
<
std
::
string
>
names
()
const
std
::
vector
<
std
::
string
>
names
()
const
{
{
return
{
"scatternd_none"
,
"scatternd_add"
,
"scatternd_mul"
};
return
{
"scatternd_none"
,
"scatternd_add"
,
"scatternd_mul"
,
"scatternd_min"
,
"scatternd_max"
};
}
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
std
::
string
make_interpolated_string
(
const
operation
&
op
)
const
{
{
hip_compile_options
options
;
const
auto
reduction
=
op
.
name
().
substr
(
std
::
char_traits
<
char
>::
length
(
"scatternd_"
));
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
return
interpolate_string
(
scatternd_kernel
,
{{
"reduction"
,
"assign_"
+
reduction
}});
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
kernel_name
=
"scatternd_kernel"
;
options
.
virtual_inputs
=
inputs
;
auto
reduction
=
"assign_"
+
v
.
get
(
"reduction"
,
std
::
string
{
"none"
});
auto
src
=
interpolate_string
(
scatternd_kernel
,
{{
"reduction"
,
reduction
}});
return
compile_hip_code_object
(
src
,
options
);
}
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
std
::
string
get_kernel_name
(
const
operation
&
)
const
{
return
"scatternd_kernel"
;
}
{
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
auto
reduction
=
op
.
name
().
substr
(
10
);
return
insert
(
compile_op
(
ctx
,
to_shapes
(
std
::
vector
<
instruction_ref
>
{
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
{{
"reduction"
,
reduction
}}));
}
compiler_replace
insert
(
const
operation
&
co
)
const
{
return
{
co
,
[](
module
&
m
,
instruction_ref
ins
,
const
operation
&
op
)
{
auto
args
=
ins
->
inputs
();
args
.
back
()
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::copy"
),
args
.
front
(),
args
.
back
());
args
.
erase
(
args
.
begin
());
return
m
.
replace_instruction
(
ins
,
op
,
args
);
}};
}
};
};
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp
0 → 100644
View file @
8d7a8a6c
/* ************************************************************************
* Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop-
* ies of the Software, and to permit persons to whom the Software is furnished
* to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM-
* PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE-
* CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* ************************************************************************ */
#ifndef MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
#define MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
#include <migraphx/kernels/type_traits.hpp>
namespace
migraphx
{
template
<
typename
To
,
typename
From
,
MIGRAPHX_REQUIRES
(
is_trivially_copyable
<
To
>{}
and
is_trivially_copyable
<
From
>
{})
>
inline
constexpr
To
bit_cast
(
From
fr
)
noexcept
{
static_assert
(
sizeof
(
To
)
==
sizeof
(
From
));
return
__builtin_bit_cast
(
To
,
fr
);
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
src/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp
View file @
8d7a8a6c
...
@@ -49,12 +49,8 @@ constexpr unsigned int dpp_row_bcast(unsigned int x)
...
@@ -49,12 +49,8 @@ constexpr unsigned int dpp_row_bcast(unsigned int x)
return
y
;
return
y
;
}
}
template
<
unsigned
int
DppCtrl
,
template
<
class
T
,
class
F
>
unsigned
int
RowMask
=
0xf
,
__device__
T
dpp_op
(
T
&
x
,
F
f
)
unsigned
int
BankMask
=
0xf
,
bool
BoundCtrl
=
false
,
class
T
>
__device__
T
dpp_mov
(
T
&
x
)
{
{
static
const
index_int
n
=
sizeof
(
T
)
<
4
?
1
:
sizeof
(
T
)
/
4
;
static
const
index_int
n
=
sizeof
(
T
)
<
4
?
1
:
sizeof
(
T
)
/
4
;
union
type
union
type
...
@@ -68,10 +64,28 @@ __device__ T dpp_mov(T& x)
...
@@ -68,10 +64,28 @@ __device__ T dpp_mov(T& x)
input
.
data
=
x
;
input
.
data
=
x
;
for
(
index_int
i
=
0
;
i
<
n
;
i
++
)
for
(
index_int
i
=
0
;
i
<
n
;
i
++
)
{
{
output
.
reg
[
i
]
=
__hip_move_dpp
(
input
.
reg
[
i
],
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
output
.
reg
[
i
]
=
f
(
input
.
reg
[
i
]
);
}
}
return
output
.
data
;
return
output
.
data
;
}
}
template
<
unsigned
int
DppCtrl
,
unsigned
int
RowMask
=
0xf
,
unsigned
int
BankMask
=
0xf
,
bool
BoundCtrl
=
false
,
class
T
>
__device__
T
dpp_mov
(
T
&
x
)
{
return
dpp_op
(
x
,
[](
auto
i
)
{
return
__hip_move_dpp
(
i
,
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
});
}
template
<
unsigned
int
Mask
,
class
T
>
__device__
T
dpp_swizzle
(
T
&
x
)
{
return
dpp_op
(
x
,
[](
auto
i
)
{
return
__hip_ds_swizzle
(
i
,
Mask
);
});
}
#endif // MIGRAPHX_HAS_DPP
#endif // MIGRAPHX_HAS_DPP
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp
0 → 100644
View file @
8d7a8a6c
This diff is collapsed.
Click to expand it.
src/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp
0 → 100644
View file @
8d7a8a6c
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
7
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment