Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
b76a9043
Commit
b76a9043
authored
Sep 26, 2022
by
charlie
Browse files
Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into refactor_dynamic_compute
parents
68c17b1b
66bbff1e
Changes
193
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
249 additions
and
142 deletions
+249
-142
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
...argets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
+12
-10
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+1
-45
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+11
-2
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+0
-1
src/targets/gpu/softmax.cpp
src/targets/gpu/softmax.cpp
+0
-49
src/tf/tf_parser.cpp
src/tf/tf_parser.cpp
+1
-1
test/gpu/adjust_allocation.cpp
test/gpu/adjust_allocation.cpp
+5
-1
test/gpu/make_precompile_op.hpp
test/gpu/make_precompile_op.hpp
+66
-0
test/gpu/pack_int8_args.cpp
test/gpu/pack_int8_args.cpp
+34
-28
test/memory_coloring_test.cpp
test/memory_coloring_test.cpp
+1
-1
test/ref_ops_test.cpp
test/ref_ops_test.cpp
+1
-1
test/simplify_algebra_test.cpp
test/simplify_algebra_test.cpp
+99
-0
test/verify/test_layernorm.cpp
test/verify/test_layernorm.cpp
+18
-3
No files found.
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
View file @
b76a9043
...
...
@@ -43,7 +43,7 @@ template <index_int Axis,
class
Input2
,
class
...
Inputs
>
__device__
void
generic_binary_layernorm
(
F
compute
,
BinOp
op
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
F
compute
,
BinOp
op
,
float
eps
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
{
using
reduce_output
=
reduce
::
with_axis
<
Input1
,
Axis
>
;
reduce
::
block
::
run
<
reduce_output
>
([
&
](
auto
,
auto
r
)
{
...
...
@@ -55,32 +55,34 @@ __device__ void generic_binary_layernorm(
return
make_array
(
x
,
x
*
x
)
*
vec_type
<
value_type
>
{
1.0
/
relements
};
})(
input1
,
input2
);
auto
mean_x
=
means
[
0
];
auto
mean_x2
=
means
[
1
];
auto
variance
=
mean_x2
-
(
mean_x
*
mean_x
);
auto
mean_x
=
means
[
0
];
auto
mean_x2
=
means
[
1
];
auto
variance
=
mean_x2
-
(
mean_x
*
mean_x
);
value_type
eps_val
=
eps
;
// implicit conversion for eps
r
.
inner
([
&
](
auto
&
y
,
auto
x1
,
auto
x2
,
auto
...
xs
)
{
auto
x
=
op
(
x1
,
x2
);
auto
m
=
x
-
mean_x
;
// m * rsqrt(mean(m ^ 2) + 1e-12)
y
=
compute
(
m
*
rsqrt
(
variance
+
value_type
{
1e-12
}),
xs
...);
// m * rsqrt(mean(m ^ 2) + epsilon)
y
=
compute
(
m
*
rsqrt
(
variance
+
eps_val
),
xs
...);
})(
output
,
input1
,
input2
,
inputs
...);
});
}
template
<
index_int
Axis
,
class
F
,
class
Output
,
class
Input
,
class
...
Inputs
>
__device__
void
layernorm
(
F
compute
,
Output
output
,
Input
input
,
Inputs
...
inputs
)
__device__
void
layernorm
(
F
compute
,
float
eps
,
Output
output
,
Input
input
,
Inputs
...
inputs
)
{
generic_binary_layernorm
<
Axis
>
(
compute
,
[](
auto
x
,
auto
)
{
return
x
;
},
output
,
input
,
input
,
inputs
...);
compute
,
[](
auto
x
,
auto
)
{
return
x
;
},
eps
,
output
,
input
,
input
,
inputs
...);
}
template
<
index_int
Axis
,
class
F
,
class
Output
,
class
Input1
,
class
Input2
,
class
...
Inputs
>
__device__
void
add_layernorm
(
F
compute
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
add_layernorm
(
F
compute
,
float
eps
,
Output
output
,
Input1
input1
,
Input2
input2
,
Inputs
...
inputs
)
{
generic_binary_layernorm
<
Axis
>
(
compute
,
[](
auto
x1
,
auto
x2
)
{
return
x1
+
x2
;
},
output
,
input1
,
input2
,
inputs
...);
compute
,
[](
auto
x1
,
auto
x2
)
{
return
x1
+
x2
;
},
eps
,
output
,
input1
,
input2
,
inputs
...);
}
}
// namespace migraphx
...
...
src/targets/gpu/lowering.cpp
View file @
b76a9043
...
...
@@ -104,54 +104,10 @@ struct miopen_apply
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"acos"
);
add_generic_op
(
"acosh"
);
add_generic_op
(
"add"
);
add_generic_op
(
"asin"
);
add_generic_op
(
"asinh"
);
add_generic_op
(
"atan"
);
add_generic_op
(
"atanh"
);
add_generic_op
(
"ceil"
);
add_generic_op
(
"contiguous"
);
add_generic_op
(
"cos"
);
add_generic_op
(
"cosh"
);
add_generic_op
(
"div"
);
add_generic_op
(
"equal"
);
add_generic_op
(
"erf"
);
add_generic_op
(
"exp"
);
add_generic_op
(
"floor"
);
add_generic_op
(
"greater"
);
add_generic_op
(
"less"
);
add_generic_op
(
"log"
);
add_generic_op
(
"logical_and"
);
add_generic_op
(
"logical_or"
);
add_generic_op
(
"logical_xor"
);
add_generic_op
(
"max"
);
add_generic_op
(
"min"
);
add_generic_op
(
"mul"
);
add_generic_op
(
"not"
);
add_generic_op
(
"pow"
);
add_generic_op
(
"prelu"
);
add_generic_op
(
"recip"
);
add_generic_op
(
"relu"
);
add_generic_op
(
"round"
);
add_generic_op
(
"rsqrt"
);
add_generic_op
(
"sigmoid"
);
add_generic_op
(
"sign"
);
add_generic_op
(
"sin"
);
add_generic_op
(
"sinh"
);
add_generic_op
(
"sqdiff"
);
add_generic_op
(
"sqrt"
);
add_generic_op
(
"sub"
);
add_generic_op
(
"tan"
);
add_generic_op
(
"tanh"
);
add_generic_op
(
"where"
);
add_extend_op
(
"abs"
);
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"clip"
);
add_extend_op
(
"convert"
);
add_extend_op
(
"elu"
);
add_extend_op
(
"gather"
);
add_extend_op
(
"leaky_relu"
);
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
b76a9043
...
...
@@ -35,6 +35,12 @@ namespace {
template
<
class
Derived
,
std
::
size_t
N
>
struct
layernorm_base
{
float
epsilon
=
1e-12
f
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
epsilon
,
"epsilon"
));
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
std
::
vector
<
module_ref
>
mods
)
const
{
std
::
size_t
nargs
=
1
;
...
...
@@ -62,6 +68,7 @@ struct layernorm_base
struct
layernorm
:
layernorm_base
<
layernorm
,
0
>
{
std
::
string
name
()
const
{
return
"gpu::prelayernorm"
;
}
};
MIGRAPHX_REGISTER_OP
(
layernorm
);
...
...
@@ -80,8 +87,9 @@ struct find_layernorm
{
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
m
.
replace_instruction
(
ins
,
layernorm
{},
x_ins
);
m
.
replace_instruction
(
ins
,
layernorm
{
eps
},
x_ins
);
}
};
...
...
@@ -96,8 +104,9 @@ struct find_add_layernorm
{
auto
ins
=
r
.
result
;
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
m
.
replace_instruction
(
ins
,
add_layernorm
{},
add_ins
->
inputs
());
m
.
replace_instruction
(
ins
,
add_layernorm
{
eps
},
add_ins
->
inputs
());
}
};
}
// namespace
...
...
src/targets/gpu/quant_convolution.cpp
View file @
b76a9043
...
...
@@ -22,7 +22,6 @@
* THE SOFTWARE.
*/
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/device/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
...
...
src/targets/gpu/softmax.cpp
deleted
100644 → 0
View file @
68c17b1b
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/softmax.hpp>
#include <migraphx/gpu/device/softmax.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/tune_axis.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_softmax
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
standard
();
return
op
.
normalize_compute_shape
({
inputs
.
at
(
0
)});
}
argument
hip_softmax
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
auto
tuned_axis
=
tune_axis
(
n_dim
,
op
.
axis
,
op
.
name
());
device
::
softmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/tf/tf_parser.cpp
View file @
b76a9043
...
...
@@ -347,7 +347,7 @@ void tf_parser::parse_node(const std::string& name)
// input was from a node with multiple outputs
if
(
contains
(
input_name
,
':'
))
{
input_name
=
input_name
.
substr
(
0
,
input
.
find
(
':'
));
input_name
.
resize
(
input
.
find
(
':'
));
}
else
{
...
...
test/gpu/adjust_allocation.cpp
View file @
b76a9043
...
...
@@ -40,6 +40,10 @@
#include <migraphx/make_op.hpp>
#include <basic_ops.hpp>
#include <test.hpp>
#include "make_precompile_op.hpp"
// Treat some operators as compilable to enable lowering
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
void
run_lowering
(
migraphx
::
program
&
p
,
bool
offload_copy
=
false
)
{
...
...
@@ -118,7 +122,7 @@ TEST_CASE(no_copy_dead_param)
auto
xb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
gx
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_to_gpu"
),
x
,
xb
);
auto
ab
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
s
)}}));
auto
sum
=
mm
->
add_instruction
(
m
igraphx
::
mak
e_op
(
"
gpu::
add"
),
gx
,
gx
,
ab
);
auto
sum
=
mm
->
add_instruction
(
m
ake_precompil
e_op
(
"add"
),
gx
,
gx
,
ab
);
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"hip::copy_from_gpu"
),
sum
);
mm
->
add_return
({
r
});
...
...
src/targets/gpu/device/gelu.c
pp
→
test/gpu/make_precompile_op.h
pp
View file @
b76a9043
...
...
@@ -21,63 +21,46 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/device/gelu.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <cmath>
#ifndef MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
#define MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
#include <migraphx/operation.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
// x * 0.5 * (1.0 + erf(x / sqrt(2.0)))
template
<
class
T
>
auto
gelu_fn
(
T
x
)
__device__
{
return
x
*
0.5
*
(
1
+
::
erf
(
x
*
M_SQRT1_2
));
}
// NOLINTNEXTLINE
#define MIGRAPHX_GPU_TEST_PRECOMPILE(...) \
struct test_compiler : migraphx::gpu::compiler<test_compiler> \
{ \
std::vector<std::string> names() const { return {__VA_ARGS__}; } \
\
template <class... Ts> \
migraphx::operation compile_op(Ts&&...) const \
{ \
MIGRAPHX_THROW("Not compilable"); \
} \
\
template <class... Ts> \
migraphx::gpu::compiler_replace compile(Ts&&...) const \
{ \
MIGRAPHX_THROW("Not compilable"); \
} \
};
// 0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * pow(x, 3))))
template
<
class
T
>
auto
gelu_fn_new
(
T
x
)
__device__
inline
migraphx
::
operation
make_precompile_op
(
migraphx
::
rank
<
0
>
,
const
migraphx
::
operation
&
op
)
{
return
0.5
*
x
*
(
1
+
tanh
(
sqrt
(
M_2_PI
)
*
(
x
+
0.044715
*
x
*
x
*
x
))
);
return
migraphx
::
make_op
(
"gpu::precompile_op"
,
{{
"op"
,
migraphx
::
to_value
(
op
)}}
);
}
void
gelu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
inline
migraphx
::
operation
make_precompile_op
(
migraphx
::
rank
<
1
>
,
const
std
::
string
&
name
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
gelu_fn
(
to_hip_type
(
x
));
}
);
return
make_precompile_op
(
migraphx
::
rank
<
0
>
{},
migraphx
::
make_op
(
name
)
);
}
void
gelu_new
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
gelu_fn_new
(
to_hip_type
(
x
));
});
}
void
add_gelu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
auto
sum
=
to_hip_type
(
x
+
y
);
return
gelu_fn
(
sum
);
});
}
void
add_gelu_new
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
template
<
class
T
>
auto
make_precompile_op
(
const
T
&
x
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
__device__
{
auto
sum
=
to_hip_type
(
x
+
y
);
return
gelu_fn
(
sum
);
});
return
make_precompile_op
(
migraphx
::
rank
<
1
>
{},
x
);
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_TEST_GPU_MAKE_PRECOMPILE_OP_HPP
test/gpu/pack_int8_args.cpp
View file @
b76a9043
...
...
@@ -38,6 +38,10 @@
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <test.hpp>
#include "make_precompile_op.hpp"
// Treat some operators as compilable to enable lowering
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
void
run_passes
(
migraphx
::
module
&
m
)
{
...
...
@@ -116,9 +120,8 @@ TEST_CASE(quant_dot)
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m3_shape
)}}));
auto
m3_beta
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::add"
),
gemm
,
m3_beta
,
output
);
auto
m3_beta
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
make_precompile_op
(
"add"
),
gemm
,
m3_beta
,
output
);
m
.
add_return
({
gemm_add
});
return
m
;
...
...
@@ -187,21 +190,23 @@ TEST_CASE(quant_dot_trans)
// back result to int8
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
auto
tl1_convert
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}}),
conta
,
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_convert
=
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}})),
conta
,
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
auto
tl1_alpha_int32
=
m
.
add_instruction
(
m
igraphx
::
make_op
(
"gpu::
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
auto
tl1_alpha_int32
=
m
.
add_instruction
(
make_precompile_op
(
"
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
// convert mul_res to int8
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
auto
tl1_alpha_int8
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}}),
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
tl1_alpha_int8
=
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}})),
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
packb
=
contb
;
if
(
int8_x4
)
...
...
@@ -306,9 +311,8 @@ TEST_CASE(quant_dot_pad)
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
s3
)}}));
auto
m3_beta
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::add"
),
gemm
,
m3_beta
,
output
);
auto
m3_beta
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
make_precompile_op
(
"add"
),
gemm
,
m3_beta
,
output
);
m
.
add_return
({
gemm_add
});
return
m
;
};
...
...
@@ -396,14 +400,15 @@ TEST_CASE(quant_dot_trans_pad)
// back result to int8
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
auto
tl1_convert
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}}),
conta
,
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
auto
tl1_convert
=
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}})),
conta
,
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
auto
tl1_alpha_int32
=
m
.
add_instruction
(
m
igraphx
::
make_op
(
"gpu::
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
auto
tl1_alpha_int32
=
m
.
add_instruction
(
make_precompile_op
(
"
mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
// convert mul_res to int8
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
...
...
@@ -415,10 +420,11 @@ TEST_CASE(quant_dot_trans_pad)
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps1
)}}));
}
auto
tl1_alpha_int8
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}}),
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
tl1_alpha_int8
=
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}})),
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
pa
=
tl1_alpha_int8
;
if
(
int8_x4
)
...
...
test/memory_coloring_test.cpp
View file @
b76a9043
...
...
@@ -724,7 +724,7 @@ TEST_CASE(test39)
auto
sub_modules
=
p
.
get_modules
();
std
::
reverse
(
sub_modules
.
begin
(),
sub_modules
.
end
());
for
(
auto
&
smod
:
sub_modules
)
for
(
const
auto
&
smod
:
sub_modules
)
{
run_pass
(
*
smod
);
}
...
...
test/ref_ops_test.cpp
View file @
b76a9043
...
...
@@ -3663,7 +3663,7 @@ TEST_CASE(multinomial_test)
result
.
visit
([
&
](
auto
output
)
{
result_vec
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
int
>
res_dist
(
5
,
0
);
for
(
auto
&
r
:
result_vec
)
for
(
const
auto
&
r
:
result_vec
)
res_dist
[
r
]
++
;
auto
dist_sum
=
std
::
accumulate
(
dist
.
begin
(),
dist
.
end
(),
0
);
auto
res_dist_sum
=
std
::
accumulate
(
res_dist
.
begin
(),
res_dist
.
end
(),
0
);
...
...
test/simplify_algebra_test.cpp
View file @
b76a9043
...
...
@@ -236,6 +236,105 @@ TEST_CASE(simplify_mul_conv1)
EXPECT
(
new_conv
->
outputs
().
front
()
->
name
()
!=
"mul"
);
}
TEST_CASE
(
simplify_mul_conv2
)
{
migraphx
::
module
m
;
auto
x
=
m
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
int32_type
,
{
1
,
128
,
28
,
28
}});
auto
w
=
m
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
int32_type
,
{
256
,
128
,
3
,
3
}}));
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
,
{{
"padding"
,
{
1
,
1
}},
{
"stride"
,
{
2
,
2
}},
{
"dilation"
,
{
1
,
1
}}}),
x
,
w
);
auto
a
=
m
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
int32_type
,
{
256
}}));
auto
unsq_a
=
m
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
a
);
auto
b
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
{
1
,
256
,
14
,
14
}}}),
unsq_a
);
auto
mul
=
m
.
add_instruction
(
migraphx
::
make_op
(
"mul"
),
conv
,
b
);
m
.
add_instruction
(
pass_op
{},
mul
);
EXPECT
(
conv
->
outputs
().
front
()
->
name
()
==
"mul"
);
run_pass
(
m
);
auto
new_conv
=
std
::
find_if
(
m
.
begin
(),
m
.
end
(),
[](
auto
&&
ins
)
{
return
ins
.
name
()
==
"convolution"
;
});
EXPECT
(
new_conv
->
outputs
().
front
()
->
name
()
!=
"mul"
);
}
// len = 1 case
TEST_CASE
(
simplify_mul_conv3
)
{
migraphx
::
module
m
;
auto
x
=
m
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
int32_type
,
{
1
,
128
,
28
,
28
}});
auto
w
=
m
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
int32_type
,
{
256
,
128
,
3
,
3
}}));
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
,
{{
"padding"
,
{
1
,
1
}},
{
"stride"
,
{
2
,
2
}},
{
"dilation"
,
{
1
,
1
}}}),
x
,
w
);
auto
a
=
m
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
int32_type
,
{
256
,
1
,
1
},
{
1
,
18
,
1
}}));
auto
b
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
{
1
,
256
,
14
,
14
}}}),
a
);
auto
mul
=
m
.
add_instruction
(
migraphx
::
make_op
(
"mul"
),
conv
,
b
);
m
.
add_instruction
(
pass_op
{},
mul
);
EXPECT
(
conv
->
outputs
().
front
()
->
name
()
==
"mul"
);
run_pass
(
m
);
auto
new_conv
=
std
::
find_if
(
m
.
begin
(),
m
.
end
(),
[](
auto
&&
ins
)
{
return
ins
.
name
()
==
"convolution"
;
});
EXPECT
(
new_conv
->
outputs
().
front
()
->
name
()
!=
"mul"
);
}
// Previously broadcasted literal case, should skip
TEST_CASE
(
simplify_mul_conv_skip1
)
{
migraphx
::
module
m
;
auto
x
=
m
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
int32_type
,
{
1
,
128
,
28
,
28
}});
auto
w
=
m
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
int32_type
,
{
256
,
128
,
3
,
3
}}));
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
,
{{
"padding"
,
{
1
,
1
}},
{
"stride"
,
{
2
,
2
}},
{
"dilation"
,
{
1
,
1
}}}),
x
,
w
);
auto
a
=
m
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
int32_type
,
{
256
,
14
,
14
},
{
1
,
0
,
0
}}));
auto
b
=
m
.
add_instruction
(
migraphx
::
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
{
1
,
256
,
14
,
14
}}}),
a
);
auto
mul
=
m
.
add_instruction
(
migraphx
::
make_op
(
"mul"
),
conv
,
b
);
m
.
add_instruction
(
pass_op
{},
mul
);
EXPECT
(
conv
->
outputs
().
front
()
->
name
()
==
"mul"
);
run_pass
(
m
);
auto
new_conv
=
std
::
find_if
(
m
.
begin
(),
m
.
end
(),
[](
auto
&&
ins
)
{
return
ins
.
name
()
==
"convolution"
;
});
EXPECT
(
new_conv
->
outputs
().
front
()
->
name
()
==
"mul"
);
}
// Another previously broadcasted literal case, should skip
TEST_CASE
(
simplify_mul_conv_skip2
)
{
migraphx
::
module
m
;
auto
x
=
m
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
int32_type
,
{
1
,
128
,
28
,
28
}});
auto
w
=
m
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
int32_type
,
{
256
,
128
,
3
,
3
}}));
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
,
{{
"padding"
,
{
1
,
1
}},
{
"stride"
,
{
2
,
2
}},
{
"dilation"
,
{
1
,
1
}}}),
x
,
w
);
auto
a
=
m
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
int32_type
,
{
256
,
14
,
14
},
{
1
,
0
,
0
}}));
auto
b
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
{
1
,
256
,
14
,
14
}}}),
a
);
auto
mul
=
m
.
add_instruction
(
migraphx
::
make_op
(
"mul"
),
conv
,
b
);
m
.
add_instruction
(
pass_op
{},
mul
);
EXPECT
(
conv
->
outputs
().
front
()
->
name
()
==
"mul"
);
run_pass
(
m
);
auto
new_conv
=
std
::
find_if
(
m
.
begin
(),
m
.
end
(),
[](
auto
&&
ins
)
{
return
ins
.
name
()
==
"convolution"
;
});
EXPECT
(
new_conv
->
outputs
().
front
()
->
name
()
==
"mul"
);
}
TEST_CASE
(
simplify_mul_slice_conv1
)
{
migraphx
::
module
m1
;
...
...
test/verify/test_layernorm.cpp
View file @
b76a9043
...
...
@@ -29,14 +29,16 @@
#include <migraphx/op/reduce_mean.hpp>
migraphx
::
instruction_ref
add_layernorm
(
migraphx
::
module
&
m
,
migraphx
::
instruction_ref
x
,
std
::
vector
<
size_t
>
dims
)
migraphx
::
instruction_ref
add_layernorm
(
migraphx
::
module
&
m
,
migraphx
::
instruction_ref
x
,
std
::
vector
<
size_t
>
dims
,
float
eps
=
1e-12
f
)
{
auto
scale
=
m
.
add_parameter
(
"scale"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
dims
.
back
()}});
auto
bias
=
m
.
add_parameter
(
"bias"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
{
dims
.
back
()}});
auto
epsilon
=
m
.
add_literal
(
1e-12
f
);
auto
epsilon
=
m
.
add_literal
(
eps
);
auto
exponent
=
m
.
add_literal
(
2.0
f
);
auto
mean
=
m
.
add_instruction
(
migraphx
::
op
::
reduce_mean
({
2
}),
x
);
...
...
@@ -88,6 +90,19 @@ struct test_layernorm2 : verify_program<test_layernorm2>
}
};
struct
test_layernorm_eps
:
verify_program
<
test_layernorm_eps
>
{
migraphx
::
program
create_program
()
const
{
migraphx
::
program
p
;
auto
*
mm
=
p
.
get_main_module
();
std
::
vector
<
size_t
>
dims
=
{
1
,
2
,
5
};
auto
x
=
mm
->
add_parameter
(
"x"
,
migraphx
::
shape
{
migraphx
::
shape
::
float_type
,
dims
});
add_layernorm
(
*
mm
,
x
,
dims
,
1e-5
f
);
return
p
;
}
};
struct
test_layernorm_triadd
:
verify_program
<
test_layernorm_triadd
>
{
migraphx
::
program
create_program
()
const
...
...
Prev
1
…
6
7
8
9
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment