Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
870a396b
Commit
870a396b
authored
Jan 23, 2023
by
Khalique Ahmed
Browse files
manual merge
parents
228b665c
d309e02f
Changes
473
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
332 additions
and
841 deletions
+332
-841
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
...argets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+0
-32
src/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp
+16
-15
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+19
-21
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
+1
-0
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
+9
-5
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+32
-0
src/targets/gpu/leaky_relu.cpp
src/targets/gpu/leaky_relu.cpp
+0
-65
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+49
-171
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+95
-69
src/targets/gpu/perfdb.cpp
src/targets/gpu/perfdb.cpp
+10
-5
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+18
-5
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+0
-195
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+33
-0
src/targets/gpu/softmax.cpp
src/targets/gpu/softmax.cpp
+0
-49
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+5
-5
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+17
-188
src/tf/parse_batchnorm.cpp
src/tf/parse_batchnorm.cpp
+27
-5
src/tf/parse_conv.cpp
src/tf/parse_conv.cpp
+0
-5
src/tf/parse_depthwiseconv.cpp
src/tf/parse_depthwiseconv.cpp
+0
-5
src/tf/tf_parser.cpp
src/tf/tf_parser.cpp
+1
-1
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
View file @
870a396b
...
...
@@ -33,38 +33,6 @@
namespace
migraphx
{
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
{
return
x
;
}
else
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
template
<
class
F
,
class
T
,
class
...
Ts
>
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
{
...
...
src/include/migraphx/
int_divide
.hpp
→
src/
targets/gpu/kernels/
include/migraphx/
kernels/ranges
.hpp
View file @
870a396b
...
...
@@ -21,28 +21,29 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_
RTGLIB_INT_DIVIDE
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_INT_DIVIDE
_HPP
#ifndef MIGRAPHX_GUARD_
KERNELS_RANGES
_HPP
#define MIGRAPHX_GUARD_
KERNELS_RANGES
_HPP
#include <migraphx/config.hpp>
#include <cmath>
#include <migraphx/kernels/iota_iterator.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
template
<
class
R
,
class
T
,
class
U
>
R
floor_divide
(
T
x
,
U
y
)
template
<
class
Iterator
>
struct
iterator_range
{
return
R
(
std
::
floor
(
double
(
x
)
/
double
(
y
)));
}
Iterator
start
;
Iterator
last
;
constexpr
Iterator
begin
()
const
{
return
start
;
}
template
<
class
R
,
class
T
,
class
U
>
R
ceil_divide
(
T
x
,
U
y
)
constexpr
Iterator
end
()
const
{
return
last
;
}
};
constexpr
iterator_range
<
iota_iterator
>
range
(
diff_int
start
,
diff_int
last
)
{
return
R
(
std
::
ceil
(
double
(
x
)
/
double
(
y
)))
;
return
{{
start
,
{}},
{
last
,
{}}}
;
}
constexpr
iterator_range
<
iota_iterator
>
range
(
diff_int
last
)
{
return
range
(
0
,
last
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
#endif // MIGRAPHX_GUARD_KERNELS_RANGES_HPP
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
870a396b
...
...
@@ -94,16 +94,17 @@ MIGRAPHX_DPP_REDUCE(op::max, v_max)
MIGRAPHX_DPP_REDUCE
(
op
::
min
,
v_min
)
MIGRAPHX_DPP_REDUCE
(
op
::
product
,
v_mul
)
template
<
class
Op
,
class
T
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
i
ndex
_int
n
,
F
f
)
template
<
class
Op
,
class
T
,
class
Index
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
I
ndex
n
,
F
f
)
{
MIGRAPHX_ASSERT
(
idx
.
max_nlocal
()
==
idx
.
nlocal
());
#if __AMDGCN_WAVEFRONT_SIZE == 32
constexpr
index_int
lanes_per_thread
=
16
;
#else
constexpr
index_int
lanes_per_thread
=
64
;
#endif
using
type
=
decltype
(
f
(
0
));
__shared__
type
buffer
[
idx
.
nlocal
()
/
lanes_per_thread
];
__shared__
type
buffer
[
idx
.
max_
nlocal
()
/
lanes_per_thread
];
type
x
=
init
;
idx
.
local_stride
(
n
,
[
&
](
auto
i
)
{
x
=
op
(
x
,
f
(
i
));
});
dpp_reduce
(
x
,
op
);
...
...
@@ -123,12 +124,12 @@ __device__ auto block_reduce(index idx, Op op, T init, index_int n, F f)
return
y
;
}
#else
template
<
class
Op
,
class
T
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
i
ndex
_int
n
,
F
f
)
template
<
class
Op
,
class
T
,
class
Index
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
I
ndex
n
,
F
f
)
{
MIGRAPHX_ASSERT
(
idx
.
max_nlocal
()
==
idx
.
nlocal
());
using
type
=
decltype
(
f
(
0
));
__shared__
type
buffer
[
idx
.
nlocal
()];
__shared__
type
buffer
[
idx
.
max_
nlocal
()];
type
x
=
init
;
idx
.
local_stride
(
n
,
[
&
](
auto
i
)
{
x
=
op
(
x
,
f
(
i
));
});
buffer
[
idx
.
local
]
=
x
;
...
...
@@ -196,17 +197,14 @@ struct block
struct
reducer
{
index
idx
;
Slicer
slice
r
;
Slicer
slice
;
template
<
class
Op
,
class
T
,
class
Read
>
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
{
return
sliced
(
slicer
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
vec_reduce
(
block_reduce
(
idx
,
op
,
init
,
x
.
get_shape
().
elements
(),
[
&
](
auto
j
)
{
return
read
(
x
[
j
],
xs
[
j
]...);
}),
op
);
return
sliced
(
slice
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
block_reduce
(
idx
,
op
,
init
,
x
.
get_shape
().
elements
(),
[
&
](
auto
j
)
{
return
vec_reduce
(
read
(
x
[
j
],
xs
[
j
]...),
op
);
});
});
}
...
...
@@ -220,7 +218,7 @@ struct block
template
<
class
F
>
__device__
auto
inner
(
F
f
)
const
{
return
sliced
(
slice
r
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
sliced
(
slice
,
[
=
](
auto
x
,
auto
...
xs
)
{
idx
.
local_stride
(
x
.
get_shape
().
elements
(),
[
&
](
auto
j
)
{
f
(
x
[
j
],
xs
[
j
]...);
});
});
}
...
...
@@ -228,7 +226,7 @@ struct block
template
<
class
Input
>
constexpr
auto
elements
()
const
{
using
reduce_type
=
decltype
(
slice
r
(
Input
{}));
using
reduce_type
=
decltype
(
slice
(
Input
{}));
using
value_type
=
typename
Input
::
type
;
constexpr
auto
relements
=
get_shape_c
<
reduce_type
>
{}.
elements
();
if
constexpr
(
vec_size
<
value_type
>
()
>
1
)
...
...
@@ -262,11 +260,11 @@ struct lane
struct
reducer
{
index
idx
;
Slicer
slice
r
;
Slicer
slice
;
template
<
class
Op
,
class
T
,
class
Read
>
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
{
return
sliced
(
slice
r
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
sliced
(
slice
,
[
=
](
auto
x
,
auto
...
xs
)
{
using
type
=
typename
decltype
(
x
)
::
type
;
type
r
=
init
;
for
(
index_int
j
=
0
;
j
<
x
.
get_shape
().
elements
();
j
++
)
...
...
@@ -286,7 +284,7 @@ struct lane
template
<
class
F
>
__device__
auto
inner
(
F
f
)
const
{
return
sliced
(
slice
r
,
[
=
](
auto
x
,
auto
...
xs
)
{
return
sliced
(
slice
,
[
=
](
auto
x
,
auto
...
xs
)
{
for
(
index_int
j
=
0
;
j
<
x
.
get_shape
().
elements
();
j
++
)
{
f
(
x
[
j
],
xs
[
j
]...);
...
...
@@ -297,7 +295,7 @@ struct lane
template
<
class
Input
>
constexpr
auto
elements
()
const
{
using
reduce_type
=
decltype
(
slice
r
(
Input
{}));
using
reduce_type
=
decltype
(
slice
(
Input
{}));
return
get_shape_c
<
reduce_type
>
{}.
elements
();
}
};
...
...
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
View file @
870a396b
...
...
@@ -128,6 +128,7 @@ struct shape
result
[
0
]
=
tidx
;
return
result
;
}
/// Convert multi-index into a single index
constexpr
index_int
single
(
index_array
idx
)
const
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
View file @
870a396b
...
...
@@ -33,11 +33,15 @@ template <index_int Axis, class Input, class Output>
__device__
void
softmax
(
Input
input
,
Output
output
)
{
reduce
::
block
::
run
<
reduce
::
with_axis
<
Input
,
Axis
>>
([
&
](
auto
,
auto
r
)
{
auto
batch_max
=
r
.
reduce
(
op
::
max
{},
lowest
{},
op
::
id
{})(
input
);
auto
batch_sum
=
r
.
reduce
(
op
::
sum
{},
0
,
[
&
](
auto
x
)
{
return
migraphx
::
exp
(
x
-
batch_max
);
})(
input
);
r
.
inner
([
&
](
auto
&
y
,
auto
x
)
{
y
=
migraphx
::
exp
(
x
-
batch_max
)
/
batch_sum
;
})(
output
,
input
);
#ifdef MIGRAPHX_USE_FAST_SOFTMAX
const
auto
c
=
vec_at
(
r
.
slice
(
input
)[
0
],
0
);
#else
const
auto
c
=
r
.
reduce
(
op
::
max
{},
lowest
{},
op
::
id
{})(
input
);
#endif
auto
batch_sum
=
r
.
reduce
(
op
::
sum
{},
0
,
[
&
](
auto
x
)
{
return
migraphx
::
convert
<
float
>
(
migraphx
::
exp
(
x
-
c
));
})(
input
);
r
.
inner
([
&
](
auto
&
y
,
auto
x
)
{
y
=
migraphx
::
exp
(
x
-
c
)
/
batch_sum
;
})(
output
,
input
);
});
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
870a396b
...
...
@@ -185,5 +185,37 @@ constexpr auto vec_reduce(T x, Op op)
}
}
template
<
class
T
>
struct
implicit_conversion_op
{
T
x
;
template
<
index_int
N
,
class
U
>
constexpr
operator
vec
<
U
,
N
>
()
const
{
if
constexpr
(
vec_size
<
T
>
()
==
0
)
{
return
x
;
}
else
{
static_assert
(
vec_size
<
T
>
()
==
N
,
"Vector mismatch size"
);
return
__builtin_convertvector
(
x
,
vec
<
U
,
N
>
);
}
}
template
<
class
U
>
constexpr
operator
U
()
const
{
return
x
;
}
};
template
<
class
T
>
constexpr
implicit_conversion_op
<
T
>
implicit_conversion
(
T
x
)
{
return
{
x
};
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP
src/targets/gpu/leaky_relu.cpp
deleted
100644 → 0
View file @
228b665c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_leaky_relu
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcasted
();
return
inputs
.
at
(
1
);
}
argument
miopen_leaky_relu
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
;
float
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
y_desc
.
get
(),
args
[
1
].
implicit
());
return
args
[
1
];
}
void
miopen_leaky_relu
::
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
ad
=
make_leaky_relu
(
op
.
alpha
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/lowering.cpp
View file @
870a396b
...
...
@@ -26,24 +26,18 @@
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
...
...
@@ -81,85 +75,25 @@ struct miopen_apply
(
void
)
i
;
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
void
init
()
{
assert
(
mod
!=
nullptr
);
assert
(
pass
!=
nullptr
);
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
&
ctx
=
get_context
();
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
true
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
auto
&
ctx
=
get_context
();
int8_x4_format
=
get_int8_x4_format
(
ctx
);
compute_fp32
=
get_compute_fp32_flag
();
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"acos"
);
add_generic_op
(
"acosh"
);
add_generic_op
(
"add"
);
add_generic_op
(
"asin"
);
add_generic_op
(
"asinh"
);
add_generic_op
(
"atan"
);
add_generic_op
(
"atanh"
);
add_generic_op
(
"ceil"
);
add_generic_op
(
"contiguous"
);
add_generic_op
(
"cos"
);
add_generic_op
(
"cosh"
);
add_generic_op
(
"div"
);
add_generic_op
(
"equal"
);
add_generic_op
(
"erf"
);
add_generic_op
(
"exp"
);
add_generic_op
(
"floor"
);
add_generic_op
(
"greater"
);
add_generic_op
(
"less"
);
add_generic_op
(
"log"
);
add_generic_op
(
"logical_and"
);
add_generic_op
(
"logical_or"
);
add_generic_op
(
"logical_xor"
);
add_generic_op
(
"max"
);
add_generic_op
(
"min"
);
add_generic_op
(
"mul"
);
add_generic_op
(
"not"
);
add_generic_op
(
"pow"
);
add_generic_op
(
"prelu"
);
add_generic_op
(
"recip"
);
add_generic_op
(
"relu"
);
add_generic_op
(
"round"
);
add_generic_op
(
"rsqrt"
);
add_generic_op
(
"sigmoid"
);
add_generic_op
(
"sign"
);
add_generic_op
(
"sin"
);
add_generic_op
(
"sinh"
);
add_generic_op
(
"sqdiff"
);
add_generic_op
(
"sqrt"
);
add_generic_op
(
"sub"
);
add_generic_op
(
"tan"
);
add_generic_op
(
"tanh"
);
add_generic_op
(
"where"
);
add_extend_op
(
"abs"
);
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"clip"
);
add_extend_op
(
"convert"
);
add_extend_op
(
"elu"
);
add_extend_op
(
"gather"
);
add_extend_op
(
"leaky_relu"
);
add_extend_op
(
"logsoftmax"
);
add_extend_op
(
"lrn"
);
add_extend_op
(
"multinomial"
);
add_extend_op
(
"nonzero"
);
add_extend_op
(
"pad"
);
add_extend_op
(
"pooling"
);
add_extend_op
(
"prefix_scan_sum"
);
add_extend_op
(
"reverse"
);
...
...
@@ -169,16 +103,15 @@ struct miopen_apply
add_extend_op
(
"scatter_none"
);
add_extend_op
(
"topk"
);
add_
batch_norm_inference_op
(
);
add_convolution_op
();
add_
de
convolution_op
();
add_
convolution_op
(
"convolution"
);
add_convolution_op
(
"deconvolution"
);
add_convolution_op
(
"quant_convolution"
);
add_gemm_op
<
op
::
dot
>
(
"dot"
);
add_gemm_op
<
op
::
quant_dot
>
(
"quant_dot"
);
add_if_op
();
add_loop_op
();
add_neg_op
();
add_nms_op
();
add_quant_convolution_op
();
}
void
copy_params
()
const
...
...
@@ -227,7 +160,8 @@ struct miopen_apply
init
();
for
(
auto
it
=
mod
->
begin
();
it
!=
mod
->
end
();
it
++
)
{
auto
s
=
it
->
get_shape
();
auto
s
=
it
->
get_shape
();
auto
attrs
=
it
->
get_operator
().
attributes
();
if
(
apply_map
.
count
(
it
->
name
())
>
0
)
{
check_shape
(
s
,
apply_map
.
at
(
it
->
name
())(
it
));
...
...
@@ -236,11 +170,37 @@ struct miopen_apply
{
check_shape
(
s
,
insert_precompile_op
(
it
));
}
else
if
(
attrs
.
contains
(
"target"
))
{
check_shape
(
s
,
insert_custom_op
(
it
,
attrs
));
}
}
copy_params
();
}
instruction_ref
insert_custom_op
(
instruction_ref
ins
,
const
value
&
attrs
)
const
{
const
auto
&
custom_op
=
ins
->
get_operator
();
if
(
attrs
.
at
(
"target"
)
==
"cpu"
)
{
auto
s
=
ins
->
get_shape
();
std
::
vector
<
instruction_ref
>
cpu_inputs
;
auto
inputs
=
ins
->
inputs
();
auto
output
=
inputs
.
back
();
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
cpu_inputs
),
[
&
](
auto
in
)
{
return
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::copy_from_gpu"
),
in
);
});
cpu_inputs
.
front
()
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::sync_stream"
),
cpu_inputs
);
auto
cpu_out
=
mod
->
insert_instruction
(
ins
,
custom_op
,
cpu_inputs
);
auto
gpu_out
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::copy_to_gpu"
),
cpu_out
,
output
);
return
mod
->
replace_instruction
(
ins
,
gpu_out
);
}
return
ins
;
}
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
const
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
...
...
@@ -259,38 +219,6 @@ struct miopen_apply
return
mod
->
insert_instruction
(
ins
,
make_op
(
"allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
}
void
add_convolution_op
()
{
apply_map
.
emplace
(
"convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
convolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
void
add_deconvolution_op
()
{
apply_map
.
emplace
(
"deconvolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
deconvolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_deconvolution
{
op
,
make_deconv
(
op
)};
auto
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
template
<
typename
Op
>
void
add_gemm_op
(
const
std
::
string
&
name
)
{
...
...
@@ -304,32 +232,19 @@ struct miopen_apply
});
}
void
add_
quant_
convolution_op
()
void
add_convolution_op
(
const
std
::
string
&
name
)
{
apply_map
.
emplace
(
"quant_convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_convolution
>
(
ins
->
get_operator
());
shape
ws
;
miopen_quant_convolution
conv
;
auto
compile_quant_conv_with_format
=
[
&
](
bool
format
)
{
conv
=
miopen_quant_convolution
{
op
,
format
,
make_conv
(
op
)};
ws
=
conv
.
find
(
get_context
(),
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
};
try
{
compile_quant_conv_with_format
(
int8_x4_format
);
}
catch
(
migraphx
::
exception
&
)
{
// In case no solver supports the default format, retry using the other format.
compile_quant_conv_with_format
(
not
int8_x4_format
);
}
auto
args
=
ins
->
inputs
();
auto
workspace
=
insert_allocation
(
ins
,
ws
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
apply_map
.
emplace
(
name
,
[
=
](
instruction_ref
ins
)
{
operation
conv
=
make_op
(
"gpu::"
+
name
,
{{
"op"
,
ins
->
get_operator
().
to_value
()},
{
"int8_x4_format"
,
int8_x4_format
}});
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
mod
->
replace_instruction
(
ins
,
conv
,
args
[
0
],
args
[
1
],
workspace
,
output
);
return
mod
->
replace_instruction
(
ins
,
make_op
(
"gpu::miopen_op"
,
{{
"op"
,
to_value
(
conv
)}}),
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
output
);
});
}
...
...
@@ -363,43 +278,6 @@ struct miopen_apply
});
}
void
add_batch_norm_inference_op
()
{
apply_map
.
emplace
(
"batch_norm_inference"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
batch_norm_inference
>
(
ins
->
get_operator
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
shape
old_shape
=
ins
->
inputs
().
at
(
1
)
->
get_shape
();
auto
input
=
ins
->
inputs
()[
0
];
auto
input_lens
=
input
->
get_shape
().
lens
();
std
::
vector
<
int64_t
>
rsp_lens
(
input_lens
.
size
(),
1
);
// for per_activation case, also need to reshape input
if
(
op
.
bn_mode
==
op
::
batch_norm_inference
::
per_activation
)
{
std
::
copy
(
input_lens
.
begin
()
+
1
,
input_lens
.
end
(),
rsp_lens
.
begin
()
+
1
);
}
else
{
rsp_lens
[
1
]
=
static_cast
<
int64_t
>
(
old_shape
.
elements
());
}
auto
reshape_op
=
op
::
reshape
{
rsp_lens
};
std
::
vector
<
instruction_ref
>
reshapes
;
std
::
transform
(
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
(),
std
::
back_inserter
(
reshapes
),
[
&
](
auto
i
)
{
return
mod
->
insert_instruction
(
ins
,
reshape_op
,
i
);
});
return
mod
->
replace_instruction
(
ins
,
miopen_batch_norm_inference
{
op
},
input
,
reshapes
[
0
],
reshapes
[
1
],
reshapes
[
2
],
reshapes
[
3
],
output
);
});
}
// use 0 - input to represent neg
void
add_neg_op
()
{
...
...
src/targets/gpu/mlir.cpp
View file @
870a396b
...
...
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "migraphx/make_op.hpp"
#include <migraphx/gpu/mlir.hpp>
#ifdef MIGRAPHX_MLIR
...
...
@@ -31,7 +32,13 @@
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Registration.h>
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
#undef MIGRAPHX_MLIR
#else
#include <mlir-c/RegisterRocMLIR.h>
#endif
#endif
#include <migraphx/env.hpp>
...
...
@@ -43,15 +50,12 @@
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/permutation.hpp>
#include <deque>
#include <variant>
#if defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) && MLIR_MIGRAPHX_DIALECT_API_VERSION >= 2
#define MIGRAPHX_MLIR_BARE_POINTER
#endif
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
...
...
@@ -99,7 +103,10 @@ struct mlir_handle
mlir_handle
(
T
p
)
:
handle
(
ptr
{
p
})
{}
T
get
()
const
{
return
handle
.
get
().
get
();
}
T
get
()
const
{
return
handle
.
get
().
get
();
// NOLINT(readability-redundant-smartptr-get)
}
T
release
()
{
return
handle
.
release
().
get
();
}
...
...
@@ -163,9 +170,11 @@ struct mlir_program
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
mmodule
(
mlirModuleCreateEmpty
(
location
))
{
MlirDialectHandle
mixr_handle
=
mlirGetDialectHandle__migraphx__
();
mlirDialectHandleRegisterDialect
(
mixr_handle
,
ctx
.
get
());
mlirRegisterAllDialects
(
ctx
.
get
());
MlirDialectRegistry
registry
=
mlirDialectRegistryCreate
();
mlirRegisterRocMLIRDialects
(
registry
);
mlirContextAppendDialectRegistry
(
ctx
.
get
(),
registry
);
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
mlirDialectRegistryDestroy
(
registry
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
}
...
...
@@ -370,7 +379,11 @@ struct mlir_program
mlir_operation_state
&
add_results
(
const
std
::
vector
<
shape
>&
outputs
)
{
auto
x
=
prog
->
make_tensors
(
outputs
);
std
::
vector
<
shape
>
reshaped
(
outputs
.
size
());
std
::
transform
(
outputs
.
begin
(),
outputs
.
end
(),
reshaped
.
begin
(),
[](
const
shape
&
r
)
{
return
shape
{
r
.
type
(),
r
.
lens
()};
});
auto
x
=
prog
->
make_tensors
(
reshaped
);
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
return
*
this
;
}
...
...
@@ -443,7 +456,8 @@ struct mlir_program
auto
ops
=
create_operation_state
(
"func.func"
);
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
std
::
string
(
"main"
)},
{
"kernel"
,
std
::
string
(
"mixr"
)}});
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
ops
.
add_region
(
std
::
move
(
region
));
insert
(
body
,
std
::
move
(
ops
));
...
...
@@ -502,11 +516,13 @@ struct mlir_program
{
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
std
::
string
tuned
=
get_tune_params
();
// check if HW supports xdlops
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
// check if HW supports xdlops
if
(
contains
(
get_xdlops_archs
(),
target_name
))
if
(
xdlops
)
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
...
...
@@ -530,7 +546,7 @@ struct mlir_program
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
// 2nd pipeline to call
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_
name
.
c_str
()
,
"amdgcn-amd-amdhsa"
,
""
);
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_
arch
.
c_str
());
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
code_object_op
op
{};
...
...
@@ -540,16 +556,7 @@ struct mlir_program
return
op
;
}
void
find_target
()
{
std
::
string
tname
=
get_device_name
();
// HACK: Since MLIR can't handle the full target name
target_name
=
trim
(
split_string
(
tname
,
':'
).
front
());
if
(
tname
.
size
()
!=
target_name
.
size
())
std
::
cout
<<
"*************** WARNING: MLIR may not compile the correct target features for: "
<<
tname
<<
std
::
endl
;
}
void
find_target
()
{
target_arch
=
get_device_name
();
}
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
{
...
...
@@ -571,14 +578,14 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
std
::
string
get_tune_params
()
{
return
get_mlir_perf_for_conv
(
pp
);
}
std
::
string
get_tune_params
(
bool
xdlops
)
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
mlir_context
ctx
;
MlirLocation
location
;
mlir_module
mmodule
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_
name
;
std
::
string
target_
arch
;
};
std
::
string
dump_mlir
(
const
module
&
m
)
...
...
@@ -589,11 +596,61 @@ std::string dump_mlir(const module& m)
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
}
code_object_op
compile_mlir
(
const
context
&
,
const
module
&
m
)
void
adjust_param_shapes
(
module
&
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
auto
names
=
m
.
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
auto
i
:
range
(
names
.
size
()))
{
const
auto
&
name
=
names
[
i
];
const
auto
&
input
=
inputs
[
i
]
->
get_shape
();
auto
param
=
m
.
get_parameter
(
name
);
if
(
input
.
standard
())
continue
;
auto
lens
=
input
.
lens
();
auto
strides
=
input
.
strides
();
std
::
vector
<
operation
>
ops
;
if
(
input
.
transposed
())
{
auto
perm
=
find_permutation
(
input
);
auto
iperm
=
invert_permutation
(
perm
);
lens
=
reorder_dims
(
lens
,
iperm
);
strides
=
reorder_dims
(
strides
,
iperm
);
ops
.
push_back
(
make_op
(
"transpose"
,
{{
"permutation"
,
perm
}}));
}
if
(
input
.
broadcasted
())
{
std
::
transform
(
lens
.
begin
(),
lens
.
end
(),
strides
.
begin
(),
lens
.
begin
(),
[](
auto
len
,
auto
stride
)
->
std
::
size_t
{
if
(
stride
==
0
)
return
1
;
return
len
;
});
ops
.
push_back
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
input
.
lens
()}}));
}
auto
new_param
=
std
::
accumulate
(
ops
.
begin
(),
ops
.
end
(),
m
.
add_parameter
(
name
+
".0"
,
shape
{
input
.
type
(),
lens
}),
[
&
](
auto
x
,
auto
op
)
{
return
m
.
insert_instruction
(
param
,
op
,
x
);
});
m
.
replace_instruction
(
param
,
new_param
);
m
.
remove_instruction
(
param
);
}
}
code_object_op
compile_mlir
(
const
context
&
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
adjust_param_shapes
(
m
,
inputs
);
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
mlir_program
mp
;
mp
.
find_target
();
mp
.
parse
(
m
);
...
...
@@ -613,46 +670,9 @@ instruction_ref insert_mlir(module& m,
std
::
vector
<
instruction_ref
>
refs
;
std
::
size_t
last
=
0
;
#ifdef MIGRAPHX_MLIR_BARE_POINTER
refs
.
reserve
(
inputs
.
size
());
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
refs
));
last
=
refs
.
size
()
-
1
;
#else
refs
.
reserve
(
inputs
.
size
()
*
15
);
std
::
unordered_map
<
uint64_t
,
instruction_ref
>
literal_map
{};
auto
get_literal
=
[
&
](
uint64_t
value
)
{
auto
fi
=
literal_map
.
find
(
value
);
if
(
fi
!=
literal_map
.
end
())
return
fi
->
second
;
auto
lit
=
m
.
add_literal
(
value
);
literal_map
.
emplace
(
value
,
lit
);
return
lit
;
};
for
(
auto
input
:
inputs
)
{
const
size_t
offset
=
0
;
auto
s
=
input
->
get_shape
();
last
=
refs
.
size
();
refs
.
push_back
(
input
);
refs
.
push_back
(
input
);
refs
.
push_back
(
get_literal
(
offset
));
// offset
// dim sizes
std
::
transform
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
std
::
back_inserter
(
refs
),
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
// dim strides
std
::
transform
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
std
::
back_inserter
(
refs
),
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
}
#endif
last
=
refs
.
size
()
-
1
;
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
output_arg
=
last
;
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
...
...
@@ -662,13 +682,19 @@ instruction_ref insert_mlir(module& m,
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
code_object_op
compile_mlir
(
const
context
&
,
const
module
&
)
{
return
{};
}
template
<
class
T
>
void
use
(
T
&
)
{
}
// Disabling clang-tidy warning on non-real useage.
// NOLINTBEGIN(performance-unnecessary-value-param)
code_object_op
compile_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
instruction_ref
>&
)
{
return
{};
}
// NOLINTEND(performance-unnecessary-value-param)
instruction_ref
// cppcheck-suppress funcArgNamesDifferent
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
...
...
src/targets/gpu/perfdb.cpp
View file @
870a396b
...
...
@@ -27,6 +27,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/permutation.hpp>
#include <fstream>
#include <mutex>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
auto
query_miopen_db
(
const
std
::
string
&
query
)
{
static
std
::
mutex
g_db_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_db_mutex
);
// TODO: Store db as a static variable
const
auto
dbpath
=
fs
::
path
{
"/opt"
}
/
"rocm"
/
"share"
/
"miopen"
/
"db"
/
"miopen.db"
;
// Check if db file exists.
...
...
@@ -108,16 +112,17 @@ auto query_miopen_db(const std::string& query)
}
// namespace
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
)
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
,
bool
xdlops
)
{
std
::
string
query
=
"select P.* \
std
::
string
solver
=
xdlops
?
"ConvMlirIgemmFwdXdlops"
:
"ConvMlirIgemmFwd"
;
std
::
string
query
=
"select P.* \
from perf_db P, config C \
where P.config = C.id AND \
P.solver = '
ConvMlirIgemmFwdXdlops
' AND \
P.solver = '
${solver}
' AND \
${config}"
;
auto
results
=
query_miopen_db
(
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}}));
auto
results
=
query_miopen_db
(
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}
,
{
"solver"
,
solver
}
}));
if
(
results
.
empty
())
return
""
;
return
results
.
front
().
at
(
"params"
);
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
870a396b
...
...
@@ -35,6 +35,12 @@ namespace {
template
<
class
Derived
,
std
::
size_t
N
>
struct
layernorm_base
{
float
epsilon
=
1e-12
f
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
epsilon
,
"epsilon"
));
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
std
::
vector
<
module_ref
>
mods
)
const
{
std
::
size_t
nargs
=
1
;
...
...
@@ -45,23 +51,27 @@ struct layernorm_base
}
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
auto
s
=
inputs
.
at
(
0
);
auto
t
=
s
.
type
();
if
(
not
mods
.
empty
())
t
=
mods
.
front
()
->
get_output_shapes
().
front
().
type
();
if
(
s
.
scalar
())
{
return
s
;
}
else
if
(
s
.
broadcasted
())
{
return
{
s
.
type
()
,
s
.
lens
()};
return
{
t
,
s
.
lens
()};
}
else
{
return
s
.
with_lens
(
s
.
lens
());
return
s
.
with_lens
(
t
,
s
.
lens
());
}
}
};
struct
layernorm
:
layernorm_base
<
layernorm
,
0
>
{
std
::
string
name
()
const
{
return
"gpu::prelayernorm"
;
}
};
MIGRAPHX_REGISTER_OP
(
layernorm
);
...
...
@@ -80,8 +90,9 @@ struct find_layernorm
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
m
.
replace_instruction
(
ins
,
layernorm
{},
x_ins
);
m
.
replace_instruction
(
ins
,
layernorm
{
eps
},
x_ins
);
}
};
...
...
@@ -89,15 +100,17 @@ struct find_add_layernorm
{
auto
matcher
()
const
{
return
match
::
layernorm
()(
match
::
var
(
"x"
)(
match
::
name
(
"add"
).
bind
(
"add"
)));
return
match
::
layernorm
()(
match
::
var
(
"x"
)(
match
::
name
(
"add"
)(
match
::
used_once
()).
bind
(
"add"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
m
.
replace_instruction
(
ins
,
add_layernorm
{},
add_ins
->
inputs
());
m
.
replace_instruction
(
ins
,
add_layernorm
{
eps
},
add_ins
->
inputs
());
}
};
}
// namespace
...
...
src/targets/gpu/quant_convolution.cpp
deleted
100644 → 0
View file @
228b665c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/device/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_quant_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
return
op
.
normalize_compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
}
argument
miopen_quant_convolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
float
beta
=
0
;
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: run convolution forward failed"
);
}
return
args
[
3
];
}
shape
miopen_quant_convolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_quant_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
cd
==
nullptr
)
cd
=
make_conv
(
op
);
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: compile solution failed"
);
}
shape
miopen_quant_convolution
::
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"PACK_INT8_SHAPE: only process int8_type"
);
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/rocblas.cpp
View file @
870a396b
...
...
@@ -21,7 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <unordered_set>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -41,6 +47,33 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
return
rb
;
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
bool
get_compute_fp32_flag
()
{
bool
compute_fp32
=
false
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
true
;
#endif
return
compute_fp32
;
}
bool
get_int8_x4_format
(
context
&
ctx
)
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
return
int8_x4_format
;
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/softmax.cpp
deleted
100644 → 0
View file @
228b665c
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/softmax.hpp>
#include <migraphx/gpu/device/softmax.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/tune_axis.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_softmax
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
standard
();
return
op
.
normalize_compute_shape
({
inputs
.
at
(
0
)});
}
argument
hip_softmax
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
auto
tuned_axis
=
tune_axis
(
n_dim
,
op
.
axis
,
op
.
name
());
device
::
softmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/target.cpp
View file @
870a396b
...
...
@@ -42,7 +42,6 @@
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
...
...
@@ -52,6 +51,7 @@
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/context.hpp>
...
...
@@ -112,8 +112,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
insert_pad
{},
dead_code_elimination
{},
rewrite_batchnorm
{},
dead_code_elimination
{},
rewrite_rnn
{},
dead_code_elimination
{},
inline_module
{},
...
...
@@ -145,14 +143,16 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
eliminate_concat
{
concat_gpu_optimization
{}},
dead_code_elimination
{},
pack_int8_args
{
},
compile_miopen
{
&
gctx
},
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}
},
pack_int8_args
{
},
dead_code_elimination
{},
fuse_ops
{
&
ctx
,
options
.
fast_math
},
dead_code_elimination
{},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}},
dead_code_elimination
{},
compile_ops
{
&
ctx
},
dead_code_elimination
{},
write_literals
{
&
ctx
},
...
...
src/targets/ref/lowering.cpp
View file @
870a396b
...
...
@@ -26,15 +26,12 @@
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/loop.hpp>
#include <migraphx/op/lrn.hpp>
...
...
@@ -75,84 +72,6 @@ typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::ena
return
x
;
}
//
// ref implemenataion of batch norm for inference
//
// inputs are:
// args[0] -> input data buffer
// args[1] -> mini batch mean
// args[2] -> mini batch variance
// args[3] -> gamma
// args[4] -> bias
//
// The equation to compute batch norm for inference is:
//
// output[i] = bias + gamma * (input[i] + mean) / sqrt(variance + epsilon)
//
// the input data format should be nchw
//
struct
ref_batch_norm_inference
{
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"ref::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
output
{
output_shape
};
double
epsilon
=
op
.
epsilon
;
auto
input
=
args
[
0
];
auto
arg_gamma
=
args
[
1
];
auto
arg_bias
=
args
[
2
];
auto
mini_batch_mean
=
args
[
3
];
auto
mini_batch_variance
=
args
[
4
];
if
(
op
.
bn_mode
==
op
::
batch_norm_inference
::
spatial
)
{
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_variance
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
par_for
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx
=
output_shape
.
multi
(
i
);
auto
c
=
idx
[
1
];
assert
((
variance
[
c
]
+
epsilon
)
>
0
);
result
[
i
]
=
gamma
[
c
]
*
(
buffer
[
i
]
-
mean
[
c
])
/
std
::
sqrt
(
variance
[
c
]
+
epsilon
)
+
bias
[
c
];
});
});
}
if
(
op
.
bn_mode
==
op
::
batch_norm_inference
::
per_activation
)
{
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_variance
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
par_for
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx
=
output_shape
.
multi
(
i
);
idx
[
0
]
=
0
;
auto
index
=
output_shape
.
index
(
idx
);
assert
((
variance
[
index
]
+
epsilon
)
>
0
);
result
[
i
]
=
gamma
[
index
]
*
(
buffer
[
i
]
-
mean
[
index
])
/
std
::
sqrt
(
variance
[
index
]
+
epsilon
)
+
bias
[
index
];
});
});
}
return
output
;
}
};
MIGRAPHX_REGISTER_OP
(
ref_batch_norm_inference
)
struct
ref_lrn
{
op
::
lrn
op
;
...
...
@@ -237,15 +156,16 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
std
::
vector
<
std
::
size_t
>
padding
;
if
(
op
.
use_dynamic_same_auto_pad
)
if
(
op
.
padding_mode
!=
op
::
padding_mode_t
::
default_
)
{
auto
input_lens
=
args
[
0
].
get_shape
().
lens
();
std
::
vector
<
std
::
size_t
>
img_lens
{
input_lens
.
begin
()
+
2
,
input_lens
.
end
()};
auto
input_lens
=
args
[
0
].
get_shape
().
lens
();
auto
weights_lens
=
args
[
1
].
get_shape
().
lens
();
std
::
vector
<
std
::
size_t
>
k_lens
{
weights_lens
.
begin
()
+
2
,
weights_lens
.
end
()};
padding
=
calc_dyn_auto_pad
(
img_lens
,
k_lens
,
op
.
stride
,
op
.
dilation
);
output_shape
=
compute_padded_shape
({
args
.
at
(
0
).
get_shape
(),
args
.
at
(
1
).
get_shape
()},
padding
);
padding
=
op
.
padding_mode
==
op
::
same_upper
?
calc_dyn_auto_pad
(
input_lens
,
weights_lens
,
op
.
stride
,
op
.
dilation
,
true
)
:
calc_dyn_auto_pad
(
input_lens
,
weights_lens
,
op
.
stride
,
op
.
dilation
,
false
);
output_shape
=
compute_padded_shape
(
args
[
0
].
get_shape
(),
args
[
1
].
get_shape
(),
padding
,
op
.
stride
,
op
.
dilation
);
}
else
{
...
...
@@ -313,34 +233,6 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
});
return
result
;
}
private:
/*!
* Used for dynamic auto padding since padding needs to be computed at evaulation time.
* \param inputs two fixed shape inputs [input_tensor, weights]
* \param padding from auto_pad calculation
*/
shape
compute_padded_shape
(
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
padding
)
const
{
const
shape
&
input
=
inputs
.
at
(
0
);
const
shape
&
weights
=
inputs
.
at
(
1
);
const
size_t
num_spatial_dims
=
input
.
lens
().
size
()
-
2
;
std
::
vector
<
size_t
>
output_lens
{
input
.
lens
()[
0
],
weights
.
lens
()[
0
]};
// calculate the output shape of the convolution: ((W - K + 2P) / S) + 1
for
(
size_t
i
=
0
;
i
<
num_spatial_dims
;
i
++
)
{
auto
padding_factor
=
padding
[
i
]
+
padding
[
i
+
num_spatial_dims
];
output_lens
.
push_back
(
std
::
size_t
(
std
::
max
<
std
::
ptrdiff_t
>
(
1
,
(
input
.
lens
()[
i
+
2
]
-
(
1
+
op
.
dilation
[
i
]
*
(
weights
.
lens
()[
i
+
2
]
-
1
))
+
padding_factor
)
/
op
.
stride
[
i
]
+
1
)));
}
return
inputs
[
0
].
with_lens
(
output_lens
);
}
};
struct
ref_im2col
...
...
@@ -454,10 +346,10 @@ struct ref_pad
std
::
string
name
()
const
{
return
"ref::pad"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
assert
(
out
put_shape
.
standard
());
argument
result
{
out
put_shape
};
assert
(
dyn_out
.
com
put
ed
_shape
.
standard
());
argument
result
{
dyn_out
.
com
put
ed
_shape
};
result
.
visit
([
&
](
auto
output
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
std
::
fill
(
output
.
begin
(),
output
.
end
(),
pad_clamp
<
type
>
(
op
.
value
));
...
...
@@ -491,9 +383,9 @@ struct ref_gemm
std
::
string
name
()
const
{
return
"ref::dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
out
put_shape
};
argument
result
{
dyn_out
.
com
put
ed
_shape
};
migemm
(
result
,
args
[
0
],
args
[
1
],
1.0
f
,
0.0
f
);
return
result
;
...
...
@@ -537,65 +429,6 @@ struct ref_quant_gemm
};
MIGRAPHX_REGISTER_OP
(
ref_gemm
)
struct
leaky_relu_op
{
op
::
leaky_relu
op
;
std
::
string
name
()
const
{
return
"ref::leaky_relu"
;
}
auto
fcn
()
const
{
auto
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
x
*
a
;
};
}
};
struct
elu_op
{
op
::
elu
op
;
std
::
string
name
()
const
{
return
"ref::elu"
;
}
auto
fcn
()
const
{
auto
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
a
*
std
::
expm1
(
x
);
};
}
};
template
<
typename
Op
>
struct
ref_unary
:
auto_register_op
<
ref_unary
<
Op
>>
{
ref_unary
()
=
default
;
template
<
class
T
>
ref_unary
(
T
pop
)
:
op
(
Op
{
std
::
move
(
pop
)})
{
}
Op
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
.
op
,
f
);
}
std
::
string
name
()
const
{
return
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
const
auto
&
s
=
inputs
.
at
(
0
);
return
{
s
.
type
(),
s
.
lens
()};
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
assert
(
input
.
get_shape
().
standard
());
std
::
transform
(
input
.
begin
(),
input
.
end
(),
output
.
begin
(),
op
.
fcn
());
});
return
result
;
}
};
template
<
class
Op
>
struct
ref_softmax
:
auto_register_op
<
ref_softmax
<
Op
>>
{
...
...
@@ -616,10 +449,10 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
{
return
op
.
normalize_compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
out
put_shape
};
auto
batch_lens
=
out
put_shape
.
lens
();
argument
result
{
dyn_out
.
com
put
ed
_shape
};
auto
batch_lens
=
dyn_out
.
com
put
ed
_shape
.
lens
();
int64_t
tuned_axis
=
tune_axis
(
args
[
0
].
get_shape
().
lens
().
size
(),
op
.
axis
,
op
.
name
());
std
::
size_t
n_dims
=
batch_lens
[
tuned_axis
];
batch_lens
[
tuned_axis
]
=
1
;
...
...
@@ -642,7 +475,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
idx
[
tuned_axis
]
=
j
;
std
::
size_t
index
=
out
put_shape
.
index
(
idx
);
std
::
size_t
index
=
dyn_out
.
com
put
ed
_shape
.
index
(
idx
);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
}
...
...
@@ -731,16 +564,12 @@ struct ref_apply
void
init
()
{
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
ref_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
ref_convolution
<
op
::
convolution
>
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
ref_gemm
,
op
::
dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
ref_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"quant_convolution"
]
=
extend_op
<
ref_convolution
<
op
::
quant_convolution
>
,
op
::
quant_convolution
>
();
apply_map
[
"elu"
]
=
extend_op
<
ref_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"im2col"
]
=
extend_op
<
ref_im2col
,
op
::
im2col
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
ref_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
ref_softmax
<
op
::
logsoftmax
>
,
op
::
logsoftmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
ref_lrn
,
op
::
lrn
>
();
apply_map
[
"pad"
]
=
extend_op
<
ref_pad
,
op
::
pad
>
();
...
...
src/tf/parse_batchnorm.cpp
View file @
870a396b
...
...
@@ -23,6 +23,7 @@
*/
#include <migraphx/tf/op_parser.hpp>
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
...
...
@@ -38,16 +39,37 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
instruction_ref
parse
(
const
op_desc
&
/*opd*/
,
const
tf_parser
&
/*parser*/
,
tf_parser
::
node_info
info
,
const
std
::
vector
<
instruction_ref
>
&
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
float
epsilon
=
1e-5
f
;
float
momentum
=
0.9
f
;
// different default epsilon than from ONNX
float
epsilon
=
1e-4
f
;
if
(
contains
(
info
.
attributes
,
"epsilon"
))
{
epsilon
=
info
.
attributes
.
at
(
"epsilon"
).
f
();
}
auto
op
=
make_op
(
"batch_norm_inference"
,
{{
"epsilon"
,
epsilon
},
{
"momentum"
,
momentum
}});
return
info
.
add_instruction
(
op
,
args
);
auto
x_lens
=
args
[
0
]
->
get_shape
().
lens
();
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
// unsqueeze tensors of shape (C) to broadcast correctly
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
scale_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
1
]);
auto
bias_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
2
]);
auto
mean_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
3
]);
auto
var_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
auto
numer
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
denom
=
info
.
add_broadcastable_binary_op
(
"pow"
,
var_eps
,
rt
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"div"
,
numer
,
denom
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
scale_unsqueeze
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
}
};
...
...
src/tf/parse_conv.cpp
View file @
870a396b
...
...
@@ -75,7 +75,6 @@ struct parse_conv : op_parser<parse_conv>
const
std
::
string
&
pad_mode
=
info
.
attributes
.
at
(
"padding"
).
s
();
if
(
pad_mode
.
find
(
"SAME"
)
!=
std
::
string
::
npos
)
{
op
.
padding_mode
=
op
::
padding_mode_t
::
same
;
std
::
vector
<
size_t
>
weight_dims
=
weights
->
get_shape
().
lens
();
size_t
weight_h
=
weight_dims
[
2
];
size_t
weight_w
=
weight_dims
[
3
];
...
...
@@ -87,10 +86,6 @@ struct parse_conv : op_parser<parse_conv>
op
.
padding
=
std
::
vector
<
size_t
>
(
pads
.
begin
(),
pads
.
end
());
}
else
if
(
pad_mode
.
find
(
"VALID"
)
!=
std
::
string
::
npos
)
{
op
.
padding_mode
=
op
::
padding_mode_t
::
valid
;
}
else
if
(
pad_mode
.
find
(
"EXPLICIT"
)
!=
std
::
string
::
npos
)
{
std
::
vector
<
size_t
>
padding
;
...
...
src/tf/parse_depthwiseconv.cpp
View file @
870a396b
...
...
@@ -80,7 +80,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
if
(
pad_mode
.
find
(
"SAME"
)
!=
std
::
string
::
npos
)
{
op
.
padding_mode
=
op
::
padding_mode_t
::
same
;
std
::
vector
<
size_t
>
weight_dims
=
weights
->
get_shape
().
lens
();
size_t
weight_h
=
weight_dims
[
2
];
size_t
weight_w
=
weight_dims
[
3
];
...
...
@@ -101,10 +100,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
op
.
padding
[
1
]
=
pads
[
1
];
}
}
else
if
(
pad_mode
.
find
(
"VALID"
)
!=
std
::
string
::
npos
)
{
op
.
padding_mode
=
op
::
padding_mode_t
::
valid
;
}
}
std
::
vector
<
int64_t
>
new_weights_shape
;
...
...
src/tf/tf_parser.cpp
View file @
870a396b
...
...
@@ -347,7 +347,7 @@ void tf_parser::parse_node(const std::string& name)
// input was from a node with multiple outputs
if
(
contains
(
input_name
,
':'
))
{
input_name
=
input_name
.
substr
(
0
,
input
.
find
(
':'
));
input_name
.
resize
(
input
.
find
(
':'
));
}
else
{
...
...
Prev
1
…
13
14
15
16
17
18
19
20
21
…
24
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment