Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
001180f5
Unverified
Commit
001180f5
authored
May 25, 2022
by
Chris Austen
Committed by
GitHub
May 25, 2022
Browse files
Merge branch 'develop' into jit-contiguous
parents
dc296a73
4e18f991
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
279 additions
and
153 deletions
+279
-153
examples/nlp/python_bert_squad/requirements_bertsquad.txt
examples/nlp/python_bert_squad/requirements_bertsquad.txt
+1
-1
src/onnx/parse_mean.cpp
src/onnx/parse_mean.cpp
+27
-11
src/reduce_dims.cpp
src/reduce_dims.cpp
+18
-4
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+52
-6
src/targets/gpu/gemm_impl.cpp
src/targets/gpu/gemm_impl.cpp
+38
-6
src/targets/gpu/include/migraphx/gpu/gemm.hpp
src/targets/gpu/include/migraphx/gpu/gemm.hpp
+7
-25
src/targets/gpu/jit/gathernd.cpp
src/targets/gpu/jit/gathernd.cpp
+1
-1
src/targets/gpu/jit/roialign.cpp
src/targets/gpu/jit/roialign.cpp
+0
-1
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+0
-1
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
+2
-2
src/targets/gpu/kernels/include/migraphx/kernels/basic_ops.hpp
...argets/gpu/kernels/include/migraphx/kernels/basic_ops.hpp
+0
-84
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
...rgets/gpu/kernels/include/migraphx/kernels/functional.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp
...ts/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
...targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
+9
-7
src/targets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp
...gets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
...gets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
+15
-0
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+1
-1
test/onnx/gen_onnx.py
test/onnx/gen_onnx.py
+14
-0
test/onnx/mean_integral_test.onnx
test/onnx/mean_integral_test.onnx
+67
-0
test/onnx/onnx_test.cpp
test/onnx/onnx_test.cpp
+24
-0
No files found.
examples/nlp/python_bert_squad/requirements_bertsquad.txt
View file @
001180f5
tensorflow==2.
5.3
tensorflow==2.
6.4
onnxruntime
onnxruntime
tokenizers
tokenizers
\ No newline at end of file
src/onnx/parse_mean.cpp
View file @
001180f5
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
#include <migraphx/onnx/checks.hpp>
#include <migraphx/onnx/checks.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/ranges.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -9,6 +10,9 @@ namespace onnx {
...
@@ -9,6 +10,9 @@ namespace onnx {
struct
parse_mean
:
op_parser
<
parse_mean
>
struct
parse_mean
:
op_parser
<
parse_mean
>
{
{
const
std
::
set
<
shape
::
type_t
>
float_types
=
{
shape
::
float_type
,
shape
::
half_type
,
shape
::
double_type
};
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Mean"
}};
}
std
::
vector
<
op_desc
>
operators
()
const
{
return
{{
"Mean"
}};
}
/// Calculates the element-wise mean of n>=1 input tensors
/// Calculates the element-wise mean of n>=1 input tensors
...
@@ -24,17 +28,29 @@ struct parse_mean : op_parser<parse_mean>
...
@@ -24,17 +28,29 @@ struct parse_mean : op_parser<parse_mean>
auto
divisor
=
info
.
add_literal
(
auto
divisor
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
args
[
0
]
->
get_shape
().
type
()},
{
num_data
}});
migraphx
::
literal
{
migraphx
::
shape
{
args
[
0
]
->
get_shape
().
type
()},
{
num_data
}});
// TODO: Only divide when using floating-point
if
(
contains
(
float_types
,
args
[
0
]
->
get_shape
().
type
()))
return
std
::
accumulate
(
args
.
begin
()
+
1
,
{
args
.
end
(),
return
std
::
accumulate
(
args
.
begin
()
+
1
,
info
.
add_broadcastable_binary_op
(
"div"
,
args
[
0
],
divisor
),
args
.
end
(),
[
&
](
auto
mean
,
auto
data_i
)
{
info
.
add_broadcastable_binary_op
(
"div"
,
args
[
0
],
divisor
),
// Pre-divide each tensor element-wise by n to reduce risk of
[
&
](
auto
mean
,
auto
data_i
)
{
// overflow during summation
// Pre-divide each tensor element-wise by n to reduce risk of
auto
div
=
// overflow during summation
info
.
add_broadcastable_binary_op
(
"div"
,
data_i
,
divisor
);
auto
div
=
return
info
.
add_broadcastable_binary_op
(
"add"
,
mean
,
div
);
info
.
add_broadcastable_binary_op
(
"div"
,
data_i
,
divisor
);
});
return
info
.
add_broadcastable_binary_op
(
"add"
,
mean
,
div
);
});
}
else
{
// Compute sum before division for integral types
auto
sum
=
std
::
accumulate
(
args
.
begin
()
+
1
,
args
.
end
(),
args
[
0
],
[
&
](
auto
accum
,
auto
data_i
)
{
return
info
.
add_broadcastable_binary_op
(
"add"
,
accum
,
data_i
);
});
return
info
.
add_broadcastable_binary_op
(
"div"
,
sum
,
divisor
);
}
}
}
};
};
...
...
src/reduce_dims.cpp
View file @
001180f5
...
@@ -16,10 +16,8 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
...
@@ -16,10 +16,8 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
auto
bstride
=
s
.
strides
()[
n
+
1
];
auto
bstride
=
s
.
strides
()[
n
+
1
];
auto
blen
=
s
.
lens
()[
n
+
1
];
auto
blen
=
s
.
lens
()[
n
+
1
];
if
(
astride
==
bstride
*
blen
)
if
(
astride
==
bstride
*
blen
or
alen
==
1
)
{
new_lens
.
push_back
(
alen
*
blen
);
new_lens
.
push_back
(
alen
*
blen
);
}
}
}
if
(
new_lens
.
size
()
!=
shapes
.
size
())
if
(
new_lens
.
size
()
!=
shapes
.
size
())
return
false
;
return
false
;
...
@@ -37,10 +35,25 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
...
@@ -37,10 +35,25 @@ bool reduce_dim(std::vector<shape>& shapes, std::size_t n)
return
true
;
return
true
;
}
}
void
reduce_dim1
(
std
::
vector
<
shape
>&
shapes
)
{
if
(
std
::
any_of
(
shapes
.
begin
(),
shapes
.
end
(),
[
&
](
const
auto
&
s
)
{
return
s
.
lens
().
size
()
<
2
or
s
.
lens
().
back
()
!=
1
;
}))
return
;
for
(
auto
&
s
:
shapes
)
{
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
.
pop_back
();
strides
.
pop_back
();
s
=
shape
{
s
.
type
(),
lens
,
strides
};
}
}
std
::
size_t
reduce_dim_all
(
std
::
vector
<
shape
>&
shapes
,
std
::
size_t
n
)
std
::
size_t
reduce_dim_all
(
std
::
vector
<
shape
>&
shapes
,
std
::
size_t
n
)
{
{
while
(
reduce_dim
(
shapes
,
n
)
and
n
<
shapes
.
size
())
{}
while
(
reduce_dim
(
shapes
,
n
)
and
n
<
shapes
.
size
())
{}
return
n
+
1
;
return
n
+
1
;
}
}
void
reduce_dim_all
(
std
::
vector
<
shape
>&
shapes
)
void
reduce_dim_all
(
std
::
vector
<
shape
>&
shapes
)
...
@@ -48,6 +61,7 @@ void reduce_dim_all(std::vector<shape>& shapes)
...
@@ -48,6 +61,7 @@ void reduce_dim_all(std::vector<shape>& shapes)
std
::
size_t
n
=
0
;
std
::
size_t
n
=
0
;
while
(
n
<
shapes
.
front
().
lens
().
size
()
-
1
)
while
(
n
<
shapes
.
front
().
lens
().
size
()
-
1
)
n
=
reduce_dim_all
(
shapes
,
n
);
n
=
reduce_dim_all
(
shapes
,
n
);
reduce_dim1
(
shapes
);
}
}
std
::
vector
<
std
::
size_t
>
base_lens
(
const
std
::
vector
<
shape
>&
shapes
)
std
::
vector
<
std
::
size_t
>
base_lens
(
const
std
::
vector
<
shape
>&
shapes
)
...
...
src/targets/gpu/fuse_ops.cpp
View file @
001180f5
...
@@ -909,11 +909,6 @@ struct find_gemm_add
...
@@ -909,11 +909,6 @@ struct find_gemm_add
if
(
not
float_equal
(
gemm
.
beta
,
0
))
if
(
not
float_equal
(
gemm
.
beta
,
0
))
return
;
return
;
if
(
std
::
any_of
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
[](
auto
i
)
{
return
not
i
->
get_shape
().
standard
();
}))
return
;
auto
inputs
=
gemm_ins
->
inputs
();
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
inputs
.
pop_back
();
...
@@ -932,6 +927,53 @@ struct find_gemm_add
...
@@ -932,6 +927,53 @@ struct find_gemm_add
}
}
};
};
auto
pointwise_name
(
const
std
::
string
&
s
)
{
return
precompile_name
(
"pointwise"
)(
match
::
make_basic_pred_matcher
([
=
](
auto
ins
)
{
module_ref
pm
=
ins
->
module_inputs
().
front
();
auto
n
=
std
::
count_if
(
pm
->
begin
(),
pm
->
end
(),
[
&
](
auto
&
i
)
{
return
i
.
name
()
==
s
;
});
if
(
n
!=
1
)
return
false
;
return
std
::
all_of
(
pm
->
begin
(),
pm
->
end
(),
[
&
](
auto
&
i
)
{
return
starts_with
(
i
.
name
(),
"@"
)
or
i
.
name
()
==
s
;
});
}));
}
struct
find_gemm_pointwise
{
auto
matcher
()
const
{
return
pointwise_name
(
"add"
)(
match
::
nargs
(
3
),
match
::
all_of
[
match
::
inputs
()](
match
::
standard_shape
()),
match
::
either_arg
(
0
,
1
)(
match
::
used_once
().
bind
(
"c"
),
match
::
name
(
"gpu::gemm"
)(
match
::
nargs
(
3
)).
bind
(
"gemm"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
gemm_ins
=
r
.
instructions
[
"gemm"
];
auto
c_ins
=
r
.
instructions
[
"c"
];
auto
gemm
=
any_cast
<
rocblas_gemm
<
op
::
dot
>>
(
gemm_ins
->
get_operator
());
// Already fused gemm
if
(
not
float_equal
(
gemm
.
beta
,
0
))
return
;
auto
inputs
=
gemm_ins
->
inputs
();
inputs
.
pop_back
();
inputs
.
push_back
(
c_ins
);
inputs
.
push_back
(
gemm_ins
->
inputs
().
back
());
gemm
.
beta
=
1
;
m
.
replace_instruction
(
ins
,
gemm
,
inputs
);
}
};
struct
find_commutative_broadcast
struct
find_commutative_broadcast
{
{
auto
matcher
()
const
auto
matcher
()
const
...
@@ -1003,7 +1045,11 @@ void fuse_ops::apply(module& m) const
...
@@ -1003,7 +1045,11 @@ void fuse_ops::apply(module& m) const
find_add_unary
{
"gpu::tanh"
,
hip_add_tanh
{},
hip_triadd_tanh
{}},
find_add_unary
{
"gpu::tanh"
,
hip_add_tanh
{},
hip_triadd_tanh
{}},
find_add_clip
{});
find_add_clip
{});
run_passes
(
m
,
{
dead_code_elimination
{}});
run_passes
(
m
,
{
dead_code_elimination
{}});
match
::
find_matches
(
m
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_commutative_broadcast
{});
match
::
find_matches
(
m
,
find_triadd_layernorm
{},
find_gemm_add
{},
find_gemm_pointwise
{},
find_commutative_broadcast
{});
match
::
find_matches
(
m
,
find_contiguous
{});
match
::
find_matches
(
m
,
find_contiguous
{});
}
}
...
...
src/targets/gpu/gemm_impl.cpp
View file @
001180f5
#include <rocblas.h>
#include <rocblas.h>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/gpu/gemm_impl.hpp>
#include <migraphx/reduce_dims.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -27,6 +28,22 @@ rocblas_datatype get_type(shape::type_t type)
...
@@ -27,6 +28,22 @@ rocblas_datatype get_type(shape::type_t type)
MIGRAPHX_THROW
(
"ROCBLAS_GEMM: data type not supported!"
);
MIGRAPHX_THROW
(
"ROCBLAS_GEMM: data type not supported!"
);
}
}
void
blas_shape
(
const
shape
&
s
)
{
if
(
s
.
lens
().
size
()
<
2
)
return
;
if
(
std
::
none_of
(
s
.
strides
().
end
()
-
2
,
s
.
strides
().
end
(),
[
&
](
auto
i
)
{
return
i
==
1
;
}))
MIGRAPHX_THROW
(
"GPU_GEMM: needs to have one matrix stride as 1"
);
if
(
s
.
lens
().
size
()
<
3
)
return
;
shape
batch_shape
{
s
.
type
(),
{
s
.
lens
().
begin
(),
s
.
lens
().
end
()
-
2
},
{
s
.
strides
().
begin
(),
s
.
strides
().
end
()
-
2
}};
auto
batch_shapes
=
reduce_dims
({
batch_shape
});
if
(
batch_shapes
.
front
().
lens
().
size
()
!=
1
)
MIGRAPHX_THROW
(
"GPU_GEMM: Batch dimension is not collapsible"
);
}
template
<
class
R
,
class
...
Ts
,
class
...
Us
>
template
<
class
R
,
class
...
Ts
,
class
...
Us
>
R
rocblas_invoke
(
R
(
*
f
)(
Ts
...),
Us
...
xs
)
R
rocblas_invoke
(
R
(
*
f
)(
Ts
...),
Us
...
xs
)
{
{
...
@@ -36,6 +53,18 @@ R rocblas_invoke(R (*f)(Ts...), Us... xs)
...
@@ -36,6 +53,18 @@ R rocblas_invoke(R (*f)(Ts...), Us... xs)
return
f
(
xs
...,
nullptr
,
nullptr
);
return
f
(
xs
...,
nullptr
,
nullptr
);
}
}
static
bool
is_transposed
(
const
shape
&
s
)
{
if
(
not
s
.
transposed
())
return
false
;
return
s
.
strides
().
back
()
!=
1
;
}
static
rocblas_int
get_batch_stride
(
const
argument
&
a
)
{
return
a
.
get_shape
().
strides
()[
a
.
get_shape
().
strides
().
size
()
-
3
];
}
template
<
class
T
>
template
<
class
T
>
void
gemm_impl
(
context
&
ctx
,
void
gemm_impl
(
context
&
ctx
,
const
shape
&
output_shape
,
const
shape
&
output_shape
,
...
@@ -45,8 +74,8 @@ void gemm_impl(context& ctx,
...
@@ -45,8 +74,8 @@ void gemm_impl(context& ctx,
bool
int8_x4_format
,
bool
int8_x4_format
,
bool
compute_fp32
)
bool
compute_fp32
)
{
{
bool
transa
=
args
[
0
].
get_shape
()
.
transposed
(
);
bool
transa
=
is_transposed
(
args
[
0
].
get_shape
());
bool
transb
=
args
[
1
].
get_shape
()
.
transposed
(
);
bool
transb
=
is_transposed
(
args
[
1
].
get_shape
());
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
dim_1
=
n_dim
-
1
;
auto
dim_1
=
n_dim
-
1
;
auto
dim_0
=
n_dim
-
2
;
auto
dim_0
=
n_dim
-
2
;
...
@@ -142,6 +171,9 @@ void gemm_impl(context& ctx,
...
@@ -142,6 +171,9 @@ void gemm_impl(context& ctx,
}
}
else
else
{
{
auto
a_stride
=
get_batch_stride
(
args
[
0
]);
auto
b_stride
=
get_batch_stride
(
args
[
1
]);
auto
c_stride
=
get_batch_stride
(
args
[
2
]);
rocblas_invoke
(
&
rocblas_gemm_strided_batched_ex
,
rocblas_invoke
(
&
rocblas_gemm_strided_batched_ex
,
ctx
.
get_stream
().
get_rocblas
(),
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
@@ -153,20 +185,20 @@ void gemm_impl(context& ctx,
...
@@ -153,20 +185,20 @@ void gemm_impl(context& ctx,
to_pointer
(
args
.
at
(
1
)),
to_pointer
(
args
.
at
(
1
)),
arg_type
,
arg_type
,
ldb
,
ldb
,
k
*
n
,
b_stride
,
to_pointer
(
args
.
at
(
0
)),
to_pointer
(
args
.
at
(
0
)),
arg_type
,
arg_type
,
lda
,
lda
,
m
*
k
,
a_stride
,
beta_v
,
beta_v
,
to_pointer
(
args
[
2
]),
to_pointer
(
args
[
2
]),
output_type
,
output_type
,
ldc
,
ldc
,
m
*
n
,
c_stride
,
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
output_type
,
output_type
,
ldc
,
ldc
,
m
*
n
,
c_stride
,
num_matrices
,
num_matrices
,
compute_type
,
compute_type
,
rocblas_gemm_algo_standard
,
rocblas_gemm_algo_standard
,
...
...
src/targets/gpu/include/migraphx/gpu/gemm.hpp
View file @
001180f5
...
@@ -18,6 +18,8 @@ namespace gpu {
...
@@ -18,6 +18,8 @@ namespace gpu {
struct
context
;
struct
context
;
void
blas_shape
(
const
shape
&
s
);
template
<
class
Op
>
template
<
class
Op
>
struct
rocblas_gemm
struct
rocblas_gemm
{
{
...
@@ -50,13 +52,14 @@ struct rocblas_gemm
...
@@ -50,13 +52,14 @@ struct rocblas_gemm
std
::
vector
<
shape
>
in_shapes
(
inputs
);
std
::
vector
<
shape
>
in_shapes
(
inputs
);
in_shapes
.
pop_back
();
in_shapes
.
pop_back
();
check_shapes
{
in_shapes
,
*
this
}.
not_broadcasted
();
check_shapes
{
in_shapes
,
*
this
}.
not_broadcasted
();
b
atch_not_transposed
(
inputs
[
0
].
strides
()
);
b
las_shape
(
inputs
[
0
]
);
b
atch_not_transposed
(
inputs
[
1
].
strides
()
);
b
las_shape
(
inputs
[
1
]
);
// if gemm and add are fused
// if gemm and add are fused
if
(
not
float_equal
(
beta
,
0
)
)
if
(
in_shapes
.
size
()
>
2
)
{
{
auto
cmat_shape
=
in_shapes
.
back
();
auto
cmat_shape
=
in_shapes
.
back
();
in_shapes
.
pop_back
();
in_shapes
.
pop_back
();
blas_shape
(
cmat_shape
);
auto
op_out_shape
=
op
.
compute_shape
(
in_shapes
);
auto
op_out_shape
=
op
.
compute_shape
(
in_shapes
);
if
(
cmat_shape
.
lens
()
!=
op_out_shape
.
lens
())
if
(
cmat_shape
.
lens
()
!=
op_out_shape
.
lens
())
{
{
...
@@ -71,6 +74,7 @@ struct rocblas_gemm
...
@@ -71,6 +74,7 @@ struct rocblas_gemm
to_string
(
cmat_shape
.
type
())
+
to_string
(
cmat_shape
.
type
())
+
", it must be: "
+
to_string
(
op_out_shape
.
type
()));
", it must be: "
+
to_string
(
op_out_shape
.
type
()));
}
}
return
op_out_shape
;
}
}
return
op
.
compute_shape
(
in_shapes
);
return
op
.
compute_shape
(
in_shapes
);
...
@@ -96,28 +100,6 @@ struct rocblas_gemm
...
@@ -96,28 +100,6 @@ struct rocblas_gemm
return
args
.
back
();
return
args
.
back
();
}
}
void
batch_not_transposed
(
const
std
::
vector
<
std
::
size_t
>&
strides
)
const
{
if
(
strides
.
size
()
<=
2
)
return
;
auto
dim_0
=
strides
.
size
()
-
2
;
auto
matrix_size
=
std
::
max
(
strides
[
dim_0
],
strides
[
dim_0
+
1
]);
std
::
vector
<
std
::
size_t
>
batch
(
strides
.
begin
(),
strides
.
begin
()
+
dim_0
);
if
(
std
::
all_of
(
batch
.
begin
(),
batch
.
end
(),
[
&
](
auto
i
)
{
return
(
i
<
matrix_size
);
}))
{
MIGRAPHX_THROW
(
"GPU_GEMM: matrix size and batch size {"
+
to_string_range
(
strides
)
+
"} are transposed!"
);
}
if
(
std
::
adjacent_find
(
batch
.
begin
(),
batch
.
end
(),
[
&
](
auto
i
,
auto
j
)
{
return
(
i
<
j
or
i
<
matrix_size
or
j
<
matrix_size
);
})
!=
batch
.
end
())
{
MIGRAPHX_THROW
(
"GPU_GEMM: batch size {"
+
to_string_range
(
strides
)
+
"} is transposed!"
);
}
}
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
return
shapes
.
size
()
-
1
;
return
shapes
.
size
()
-
1
;
...
...
src/targets/gpu/jit/gathernd.cpp
View file @
001180f5
...
@@ -19,7 +19,7 @@ namespace gpu {
...
@@ -19,7 +19,7 @@ namespace gpu {
// NOLINTNEXTLINE
// NOLINTNEXTLINE
static
const
char
*
const
gathernd_kernel
=
R"__migraphx__(
static
const
char
*
const
gathernd_kernel
=
R"__migraphx__(
#include <migraphx/kernels/gathernd.hpp>
#include <migraphx/kernels/gathernd.hpp>
#include <migraphx/kernels/
basic_
ops.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
#include <args.hpp>
...
...
src/targets/gpu/jit/roialign.cpp
View file @
001180f5
...
@@ -19,7 +19,6 @@ namespace gpu {
...
@@ -19,7 +19,6 @@ namespace gpu {
// NOLINTNEXTLINE
// NOLINTNEXTLINE
static
const
char
*
const
roialign_kernel
=
R"__migraphx__(
static
const
char
*
const
roialign_kernel
=
R"__migraphx__(
#include <migraphx/kernels/roialign.hpp>
#include <migraphx/kernels/roialign.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
#include <args.hpp>
...
...
src/targets/gpu/jit/scatternd.cpp
View file @
001180f5
...
@@ -19,7 +19,6 @@ namespace gpu {
...
@@ -19,7 +19,6 @@ namespace gpu {
// NOLINTNEXTLINE
// NOLINTNEXTLINE
static
const
char
*
const
scatternd_kernel
=
R"__migraphx__(
static
const
char
*
const
scatternd_kernel
=
R"__migraphx__(
#include <migraphx/kernels/scatternd.hpp>
#include <migraphx/kernels/scatternd.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
#include <args.hpp>
...
...
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
View file @
001180f5
...
@@ -146,8 +146,8 @@ struct array
...
@@ -146,8 +146,8 @@ struct array
constexpr
array
carry
(
array
result
)
const
constexpr
array
carry
(
array
result
)
const
{
{
u
in
t32_
t
overflow
=
0
;
in
dex_in
t
overflow
=
0
;
for
(
std
::
ptr
diff_t
i
=
result
.
size
()
-
1
;
i
>
0
;
i
--
)
for
(
diff_
in
t
i
=
result
.
size
()
-
1
;
i
>
0
;
i
--
)
{
{
auto
z
=
result
[
i
]
+
overflow
;
auto
z
=
result
[
i
]
+
overflow
;
// Reset overflow
// Reset overflow
...
...
src/targets/gpu/kernels/include/migraphx/kernels/basic_ops.hpp
deleted
100755 → 0
View file @
dc296a73
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_BASIC_OPS_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_BASIC_OPS_HPP
#include <migraphx/kernels/types.hpp>
namespace
migraphx
{
struct
sum
{
template
<
class
T
,
class
U
>
constexpr
auto
operator
()(
T
x
,
U
y
)
const
{
return
x
+
y
;
}
};
struct
product
{
template
<
class
T
,
class
U
>
constexpr
auto
operator
()(
T
x
,
U
y
)
const
{
return
x
*
y
;
}
};
struct
id
{
template
<
class
T
>
constexpr
auto
operator
()(
T
x
)
const
{
return
x
;
}
};
struct
mean
{
size_t
item_num
=
1
;
template
<
class
T
>
constexpr
auto
operator
()(
T
x
)
const
{
return
x
/
static_cast
<
T
>
(
item_num
);
}
};
struct
max_f
{
template
<
class
T
,
class
U
>
constexpr
auto
operator
()(
T
x
,
U
y
)
const
{
return
(
x
>
y
)
?
x
:
y
;
}
};
inline
constexpr
auto
max
=
max_f
{};
struct
min_f
{
template
<
class
T
,
class
U
>
constexpr
auto
operator
()(
T
x
,
U
y
)
const
{
return
(
x
<
y
)
?
x
:
y
;
}
};
inline
constexpr
auto
min
=
min_f
{};
struct
lowest
{
template
<
class
T
>
constexpr
operator
T
()
const
{
return
std
::
numeric_limits
<
T
>::
lowest
();
}
};
struct
highest
{
template
<
class
T
>
constexpr
operator
T
()
const
{
return
std
::
numeric_limits
<
T
>::
max
();
}
};
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_BASIC_OPS_HPP
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
View file @
001180f5
...
@@ -137,7 +137,7 @@ constexpr auto by(F f)
...
@@ -137,7 +137,7 @@ constexpr auto by(F f)
template
<
class
F
,
class
...
Ts
>
template
<
class
F
,
class
...
Ts
>
constexpr
void
each_args
(
F
f
,
Ts
&&
...
xs
)
constexpr
void
each_args
(
F
f
,
Ts
&&
...
xs
)
{
{
swallow
{(
f
(
st
d
::
forward
<
Ts
>
(
xs
)),
0
)...};
swallow
{(
f
(
st
atic_cast
<
Ts
&&
>
(
xs
)),
0
)...};
}
}
template
<
class
F
>
template
<
class
F
>
...
...
src/targets/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp
View file @
001180f5
...
@@ -13,7 +13,7 @@ struct basic_iota_iterator
...
@@ -13,7 +13,7 @@ struct basic_iota_iterator
F
f
;
F
f
;
using
difference_type
=
diff_int
;
using
difference_type
=
diff_int
;
using
reference
=
decltype
(
f
(
std
::
declval
<
Iterator
>
()));
using
reference
=
decltype
(
f
(
declval
<
Iterator
>
()));
using
value_type
=
remove_reference_t
<
reference
>
;
using
value_type
=
remove_reference_t
<
reference
>
;
using
pointer
=
add_pointer_t
<
value_type
>
;
using
pointer
=
add_pointer_t
<
value_type
>
;
...
...
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
View file @
001180f5
...
@@ -3,14 +3,15 @@
...
@@ -3,14 +3,15 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/math.hpp>
#include <migraphx/kernels/array.hpp>
#include <migraphx/kernels/array.hpp>
namespace
migraphx
{
namespace
migraphx
{
struct
max_pool
struct
max_pool
{
{
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
{
return
lowest
()
;
}
MIGRAPHX_DEVICE_CONSTEXPR
auto
init
()
{
return
lowest
{}
;
}
template
<
class
T
>
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
T
operator
()(
T
x
,
T
y
)
MIGRAPHX_DEVICE_CONSTEXPR
T
operator
()(
T
x
,
T
y
)
...
@@ -55,7 +56,7 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate(
...
@@ -55,7 +56,7 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate(
return
0
;
return
0
;
}
}
xy
[
ii
]
=
max
(
xy
[
ii
],
0.0
f
);
xy
[
ii
]
=
migraphx
::
max
(
xy
[
ii
],
0.0
f
);
low
[
ii
]
=
xy
[
ii
];
low
[
ii
]
=
xy
[
ii
];
high
[
ii
]
=
low
[
ii
]
+
1
;
high
[
ii
]
=
low
[
ii
]
+
1
;
if
(
low
[
ii
]
>=
dims
[
ii
]
-
1
)
if
(
low
[
ii
]
>=
dims
[
ii
]
-
1
)
...
@@ -164,11 +165,12 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t,
...
@@ -164,11 +165,12 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t,
for
(
index_int
ii
=
0
;
ii
<
roi_size
.
size
();
++
ii
)
for
(
index_int
ii
=
0
;
ii
<
roi_size
.
size
();
++
ii
)
{
{
roi_size
[
ii
]
=
roi_ends
[
ii
]
-
roi_starts
[
ii
];
roi_size
[
ii
]
=
roi_ends
[
ii
]
-
roi_starts
[
ii
];
roi_size
[
ii
]
=
max
(
roi_size
[
ii
],
1.0
f
);
roi_size
[
ii
]
=
migraphx
::
max
(
roi_size
[
ii
],
1.0
f
);
bin_size
[
ii
]
=
roi_size
[
ii
]
/
out_dims
[
ii
];
bin_size
[
ii
]
=
roi_size
[
ii
]
/
out_dims
[
ii
];
bin_grid_size
[
ii
]
=
bin_grid_size
[
ii
]
=
(
s
.
sampling_ratio
>
0
)
(
s
.
sampling_ratio
>
0
)
?
s
.
sampling_ratio
:
std
::
ceil
(
roi_size
[
ii
]
/
out_dims
[
ii
]);
?
s
.
sampling_ratio
:
migraphx
::
ceil
(
roi_size
[
ii
]
/
out_dims
[
ii
]);
}
}
const
auto
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
const
auto
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp
View file @
001180f5
...
@@ -11,7 +11,7 @@ template <class T>
...
@@ -11,7 +11,7 @@ template <class T>
struct
tensor_view_iterator_read
struct
tensor_view_iterator_read
{
{
T
*
view
;
T
*
view
;
constexpr
auto
&
operator
()(
std
::
size_
t
n
)
const
constexpr
auto
&
operator
()(
index_in
t
n
)
const
{
{
MIGRAPHX_ASSERT
(
view
!=
nullptr
);
MIGRAPHX_ASSERT
(
view
!=
nullptr
);
return
(
*
view
)[
n
];
return
(
*
view
)[
n
];
...
...
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
View file @
001180f5
...
@@ -35,6 +35,21 @@ struct enable_if<true, T>
...
@@ -35,6 +35,21 @@ struct enable_if<true, T>
template
<
bool
B
,
class
T
=
void
>
template
<
bool
B
,
class
T
=
void
>
using
enable_if_t
=
typename
enable_if
<
B
,
T
>::
type
;
using
enable_if_t
=
typename
enable_if
<
B
,
T
>::
type
;
template
<
bool
B
,
class
T
,
class
F
>
struct
conditional
{
using
type
=
T
;
};
template
<
class
T
,
class
F
>
struct
conditional
<
false
,
T
,
F
>
{
using
type
=
F
;
};
template
<
bool
B
,
class
T
,
class
F
>
using
conditional_t
=
typename
conditional
<
B
,
T
,
F
>::
type
;
// NOLINTNEXTLINE
// NOLINTNEXTLINE
#define MIGRAPHX_BUILTIN_TYPE_TRAIT1(name) \
#define MIGRAPHX_BUILTIN_TYPE_TRAIT1(name) \
template <class T> \
template <class T> \
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
001180f5
...
@@ -79,7 +79,7 @@ __device__ __host__ auto as_vec(T* x)
...
@@ -79,7 +79,7 @@ __device__ __host__ auto as_vec(T* x)
}
}
template
<
class
T
,
index_int
N
>
template
<
class
T
,
index_int
N
>
using
safe_vec
=
vec
<
std
::
conditional_t
<
std
::
is_same
<
T
,
bool
>
{},
uint8_t
,
T
>
,
N
>
;
using
safe_vec
=
vec
<
conditional_t
<
is_same
<
T
,
bool
>
{},
uint8_t
,
T
>
,
N
>
;
template
<
class
...
Ts
>
template
<
class
...
Ts
>
constexpr
auto
vec_transform
(
Ts
...
xs
)
constexpr
auto
vec_transform
(
Ts
...
xs
)
...
...
test/onnx/gen_onnx.py
View file @
001180f5
...
@@ -3178,6 +3178,20 @@ def mean_test():
...
@@ -3178,6 +3178,20 @@ def mean_test():
return
([
node
],
data
,
[
mean
])
return
([
node
],
data
,
[
mean
])
@
onnx_test
def
mean_integral_test
():
data
=
[
helper
.
make_tensor_value_info
(
str
(
i
),
TensorProto
.
INT32
,
[
2
,
2
,
2
])
for
i
in
range
(
10
)
]
data_names
=
[
str
(
i
)
for
i
in
range
(
10
)]
mean
=
helper
.
make_tensor_value_info
(
'mean'
,
TensorProto
.
INT32
,
[
2
,
2
,
2
])
node
=
onnx
.
helper
.
make_node
(
"Mean"
,
inputs
=
data_names
,
outputs
=
[
"mean"
])
return
([
node
],
data
,
[
mean
])
@
onnx_test
@
onnx_test
def
min_test
():
def
min_test
():
a
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
3
])
a
=
helper
.
make_tensor_value_info
(
'0'
,
TensorProto
.
FLOAT
,
[
3
])
...
...
test/onnx/mean_integral_test.onnx
0 → 100644
View file @
001180f5
mean_integral_test:Ö
*
0
1
2
3
4
5
6
7
8
9mean"Meanmean_integral_testZ
0
Z
1
Z
2
Z
3
Z
4
Z
5
Z
6
Z
7
Z
8
Z
9
b
mean
B
\ No newline at end of file
test/onnx/onnx_test.cpp
View file @
001180f5
...
@@ -2890,6 +2890,30 @@ TEST_CASE(mean_test)
...
@@ -2890,6 +2890,30 @@ TEST_CASE(mean_test)
EXPECT
(
p
==
prog
);
EXPECT
(
p
==
prog
);
}
}
TEST_CASE
(
mean_integral_test
)
{
const
std
::
size_t
num_data
=
10
;
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
migraphx
::
shape
s
{
migraphx
::
shape
::
int32_type
,
{
2
,
2
,
2
}};
auto
mean
=
mm
->
add_parameter
(
"0"
,
s
);
for
(
std
::
size_t
i
=
1
;
i
<
num_data
;
++
i
)
{
auto
data
=
mm
->
add_parameter
(
std
::
to_string
(
i
),
s
);
mean
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"add"
),
mean
,
data
);
}
auto
div_lit
=
mm
->
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
s
.
type
()},
{
num_data
}});
auto
divisor
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
div_lit
);
mean
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"div"
),
mean
,
divisor
);
auto
prog
=
optimize_onnx
(
"mean_integral_test.onnx"
);
EXPECT
(
p
==
prog
);
}
TEST_CASE
(
min_test
)
TEST_CASE
(
min_test
)
{
{
migraphx
::
program
p
;
migraphx
::
program
p
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment