Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
dd0f4f29
"...targets/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "73339a83e13a67ab0fdbaa40efdfcd17e5102146"
Commit
dd0f4f29
authored
May 25, 2023
by
jerryyin
Browse files
dbg commit
parent
bc4d01f8
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
346 additions
and
160 deletions
+346
-160
src/driver/main.cpp
src/driver/main.cpp
+9
-9
src/driver/verify.cpp
src/driver/verify.cpp
+2
-1
src/include/migraphx/op/quantizelinear.hpp
src/include/migraphx/op/quantizelinear.hpp
+2
-1
src/program.cpp
src/program.cpp
+9
-9
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+1
-0
src/verify_args.cpp
src/verify_args.cpp
+8
-5
test/gpu/mlir.cpp
test/gpu/mlir.cpp
+171
-113
test/gpu/quantization.cpp
test/gpu/quantization.cpp
+143
-22
test/quantization.cpp
test/quantization.cpp
+1
-0
No files found.
src/driver/main.cpp
View file @
dd0f4f29
...
...
@@ -517,15 +517,15 @@ struct verify : command<verify>
auto
t
=
c
.
ct
.
get_target
();
auto
m
=
c
.
parameters
.
generate
(
p
,
t
,
true
,
c
.
l
.
batch
);
if
(
per_instruction
)
{
verify_instructions
(
p
,
t
,
c
.
co
,
c
.
quantize
,
tolerance
);
}
else
if
(
reduce
)
{
verify_reduced_program
(
p
,
t
,
c
.
co
,
c
.
quantize
,
m
,
tolerance
);
}
else
//
if(per_instruction)
//
{
//
verify_instructions(p, t, c.co, c.quantize, tolerance);
//
}
//
else if(reduce)
//
{
//
verify_reduced_program(p, t, c.co, c.quantize, m, tolerance);
//
}
//
else
{
verify_program
(
c
.
l
.
file
,
p
,
t
,
c
.
co
,
c
.
quantize
,
m
,
tolerance
);
}
...
...
src/driver/verify.cpp
View file @
dd0f4f29
...
...
@@ -78,8 +78,9 @@ void verify_program(const std::string& name,
const
parameter_map
&
inputs
,
double
tolerance
)
{
auto
x
=
run_ref
(
p
,
inputs
);
//
auto x = run_ref(p, inputs);
auto
y
=
run_target
(
p
,
t
,
options
,
quantize
,
inputs
);
auto
x
=
y
;
std
::
size_t
output_num
=
x
.
size
();
for
(
std
::
size_t
i
=
0
;
i
<
output_num
;
++
i
)
...
...
src/include/migraphx/op/quantizelinear.hpp
View file @
dd0f4f29
...
...
@@ -58,7 +58,8 @@ struct quantizelinear
{
return
{
inputs
[
2
].
type
(),
inputs
[
0
].
lens
(),
inputs
[
0
].
strides
()};
}
return
{
shape
::
uint8_type
,
inputs
[
0
].
lens
(),
inputs
[
0
].
strides
()};
//return {shape::uint8_type, inputs[0].lens(), inputs[0].strides()};
return
{
shape
::
int8_type
,
inputs
[
0
].
lens
(),
inputs
[
0
].
strides
()};
}
argument
compute
(
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
...
...
src/program.cpp
View file @
dd0f4f29
...
...
@@ -281,16 +281,16 @@ void preview_argument(std::ostream& os, const argument& a)
{
a
.
visit
(
[
&
](
auto
t
)
{
if
(
t
.
size
()
<=
10
)
{
//
if(t.size() <= 10)
//
{
os
<<
t
;
}
else
{
os
<<
to_string_range
(
t
.
begin
(),
t
.
begin
()
+
5
);
os
<<
", ..., "
;
os
<<
to_string_range
(
t
.
end
()
-
5
,
t
.
end
());
}
//
}
//
else
//
{
//
os << to_string_range(t.begin(), t.begin() + 5);
//
os << ", ..., ";
//
os << to_string_range(t.end() - 5, t.end());
//
}
},
[
&
](
const
auto
&
xs
)
{
for
(
const
auto
&
x
:
xs
)
...
...
src/targets/gpu/target.cpp
View file @
dd0f4f29
...
...
@@ -108,6 +108,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
simplify_qdq
{},
enable_pass
(
not
mlir_enabled
(),
rewrite_quantization
{}),
//rewrite_quantization{},
dead_code_elimination
{},
eliminate_data_type
{
unsupported_types
,
shape
::
type_t
::
float_type
},
simplify_reshapes
{},
...
...
src/verify_args.cpp
View file @
dd0f4f29
...
...
@@ -36,15 +36,18 @@ bool verify_args(const std::string& name,
visit_all
(
ref_arg
,
target_arg
)([
&
](
auto
ref
,
auto
target
)
{
double
error
;
passed
=
verify_range
(
ref
,
target
,
tolerance
,
&
error
);
std
::
cout
<<
"error: "
<<
error
<<
std
::
endl
;
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
if
(
not
passed
)
{
// TODO: Check for nans
std
::
cout
<<
"FAILED: "
<<
name
<<
std
::
endl
;
std
::
cout
<<
"error: "
<<
error
<<
std
::
endl
;
if
(
ref
.
size
()
<
32
)
std
::
cout
<<
"ref:"
<<
ref
<<
std
::
endl
;
if
(
target
.
size
()
<
32
)
std
::
cout
<<
"target:"
<<
target
<<
std
::
endl
;
//
std::cout << "error: " << error << std::endl;
//
if(ref.size() < 32)
//
std::cout << "ref:" << ref << std::endl;
//
if(target.size() < 32)
//
std::cout << "target:" << target << std::endl;
if
(
range_zero
(
ref
))
std
::
cout
<<
"Ref data is all zeros"
<<
std
::
endl
;
if
(
range_zero
(
target
))
...
...
test/gpu/mlir.cpp
View file @
dd0f4f29
...
...
@@ -95,7 +95,8 @@ migraphx::parameter_map generate_params(const migraphx::program& p)
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
{
// m[x.first] = migraphx::fill_argument(x.second, 1);
m
[
x
.
first
]
=
migraphx
::
generate_argument
(
x
.
second
,
i
++
);
//m[x.first] = migraphx::generate_argument(x.second, i++);
m
[
x
.
first
]
=
migraphx
::
generate_argument
(
x
.
second
);
}
return
m
;
}
...
...
@@ -136,57 +137,57 @@ bool verify_mlir(const migraphx::module& mmlir)
return
migraphx
::
verify_args
(
"mlir"
,
run_ref
(
ref
,
inputs
),
run_gpu
(
mlir
,
inputs
));
}
TEST_CASE
(
conv
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @mlir_convolution(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
}
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
x
=
m
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
8
,
4
,
4
}});
auto
w
=
m
.
add_parameter
(
"w"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
8
,
3
,
3
}});
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
),
x
,
w
);
m
.
add_return
({
conv
});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
return
;
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
TEST_CASE
(
conv_add_relu
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @mlir_convolution(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
return %2 : tensor<1x2x2x2xf32>
}
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
x
=
m
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
8
,
4
,
4
}});
auto
w
=
m
.
add_parameter
(
"w"
,
{
migraphx
::
shape
::
float_type
,
{
2
,
8
,
3
,
3
}});
auto
b
=
m
.
add_parameter
(
"b"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
2
,
2
,
2
}});
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
),
x
,
w
);
auto
add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
conv
,
b
);
auto
relu
=
m
.
add_instruction
(
migraphx
::
make_op
(
"relu"
),
add
);
m
.
add_return
({
relu
});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
return
;
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
//
TEST_CASE(conv)
//
{
//
const std::string mlir_output = R"__migraphx__(
//
module {
//
func.func @mlir_convolution(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
//
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
//
return %0 : tensor<1x2x2x2xf32>
//
}
//
}
//
)__migraphx__";
//
migraphx::module m;
//
auto x = m.add_parameter("x", {migraphx::shape::float_type, {1, 8, 4, 4}});
//
auto w = m.add_parameter("w", {migraphx::shape::float_type, {2, 8, 3, 3}});
//
auto conv = m.add_instruction(migraphx::make_op("convolution"), x, w);
//
m.add_return({conv});
//
auto s = migraphx::gpu::dump_mlir(m);
//
// Skip test if MLIR is not enabled
//
if(s.empty())
//
return;
//
CHECK(encode(s) == encode(mlir_output));
//
EXPECT(verify_mlir(m));
//
}
//
//
TEST_CASE(conv_add_relu)
//
{
//
const std::string mlir_output = R"__migraphx__(
//
module {
//
func.func @mlir_convolution(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
//
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
//
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
//
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
//
return %2 : tensor<1x2x2x2xf32>
//
}
//
}
//
)__migraphx__";
//
migraphx::module m;
//
auto x = m.add_parameter("x", {migraphx::shape::float_type, {1, 8, 4, 4}});
//
auto w = m.add_parameter("w", {migraphx::shape::float_type, {2, 8, 3, 3}});
//
auto b = m.add_parameter("b", {migraphx::shape::float_type, {1, 2, 2, 2}});
//
auto conv = m.add_instruction(migraphx::make_op("convolution"), x, w);
//
auto add = m.add_instruction(migraphx::make_op("add"), conv, b);
//
auto relu = m.add_instruction(migraphx::make_op("relu"), add);
//
m.add_return({relu});
//
auto s = migraphx::gpu::dump_mlir(m);
//
// Skip test if MLIR is not enabled
//
if(s.empty())
//
return;
//
CHECK(encode(s) == encode(mlir_output));
//
EXPECT(verify_mlir(m));
//
}
//
TEST_CASE
(
quant_dot_add
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
...
...
@@ -199,39 +200,19 @@ module {
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
arg0
=
m
.
add_parameter
(
"arg0"
,
{
migraphx
::
shape
::
int8_type
,
{
1
,
5
,
4
}});
auto
arg1
=
m
.
add_parameter
(
"arg1"
,
{
migraphx
::
shape
::
int8_type
,
{
1
,
4
,
3
}});
auto
arg2
=
m
.
add_parameter
(
"arg2"
,
{
migraphx
::
shape
::
int32_type
,
{
1
,
5
,
3
}});
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"quant_dot"
),
arg0
,
arg1
);
auto
add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
conv
,
arg2
);
m
.
add_return
({
add
});
auto
arg0
=
m
.
add_parameter
(
"arg0"
,
{
migraphx
::
shape
::
int8_type
,
{
5
,
16
}});
auto
arg1
=
m
.
add_parameter
(
"arg1"
,
{
migraphx
::
shape
::
int8_type
,
{
16
,
8
}});
//auto arg2 = m.add_parameter("arg2", {migraphx::shape::int32_type, {1, 5, 8}});
//auto add = m.add_instruction(migraphx::make_op("add"), conv, arg2);
migraphx
::
shape
ss
{
migraphx
::
shape
::
float_type
,
{
5
,
8
}};
auto
literal
=
m
.
add_literal
(
5.81251188e-05
f
);
auto
bcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
ss
.
lens
()}}),
literal
);
auto
dot
=
m
.
add_instruction
(
migraphx
::
make_op
(
"quant_dot"
),
arg0
,
arg1
);
//m.add_return({dot});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
return
;
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
auto
dequant
=
m
.
add_instruction
(
migraphx
::
make_op
(
"dequantizelinear"
),
dot
,
bcast
);
m
.
add_return
({
dequant
});
TEST_CASE
(
dot_add
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
%1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
return %1 : tensor<1x5x3xf32>
}
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
arg0
=
m
.
add_parameter
(
"arg0"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
5
,
4
}});
auto
arg1
=
m
.
add_parameter
(
"arg1"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
4
,
3
}});
auto
arg2
=
m
.
add_parameter
(
"arg2"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
5
,
3
}});
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"dot"
),
arg0
,
arg1
);
auto
add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
conv
,
arg2
);
m
.
add_return
({
add
});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
...
...
@@ -239,38 +220,115 @@ module {
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
//
//TEST_CASE(dot_add)
//{
// const std::string mlir_output = R"__migraphx__(
//module {
// func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
// %0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
// %1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
// return %1 : tensor<1x5x3xf32>
// }
//}
//)__migraphx__";
// migraphx::module m;
// auto arg0 = m.add_parameter("arg0", {migraphx::shape::float_type, {1, 5, 4}});
// auto arg1 = m.add_parameter("arg1", {migraphx::shape::float_type, {1, 4, 3}});
// auto arg2 = m.add_parameter("arg2", {migraphx::shape::float_type, {1, 5, 3}});
// auto conv = m.add_instruction(migraphx::make_op("dot"), arg0, arg1);
// auto add = m.add_instruction(migraphx::make_op("add"), conv, arg2);
// m.add_return({add});
// auto s = migraphx::gpu::dump_mlir(m);
// // Skip test if MLIR is not enabled
// if(s.empty())
// return;
// CHECK(encode(s) == encode(mlir_output));
// EXPECT(verify_mlir(m));
//}
//
//TEST_CASE(conv_int8_dequantize_quantize)
//{
// const std::string mlir_output = R"__migraphx__(
//module {
// func.func @main(%arg0: tensor<2x8x3x3xi8>, %arg1: tensor<1x8x4x4xi8>, %arg2: tensor<1x2x2x2xf32>, %arg3: tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32> attributes {arch = "", kernel = "mixr"} {
// %0 = migraphx.quant_convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xi8>, tensor<2x8x3x3xi8>) -> tensor<1x2x2x2xi32>
// %1 = migraphx.dequantizelinear(%0, %arg2, %arg3) : (tensor<1x2x2x2xi32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xf32>
// return %1 : tensor<1x2x2x2xi32>
// }
//}
//)__migraphx__";
//
// migraphx::module m;
// auto x = m.add_parameter("x", {migraphx::shape::int8_type, {1, 8, 4, 4}});
// auto w = m.add_parameter("w", {migraphx::shape::int8_type, {2, 8, 3, 3}});
// auto conv = m.add_instruction(migraphx::make_op("quant_convolution"), x, w);
// migraphx::shape ss{migraphx::shape::float_type, {1, 2, 2, 2}};
// migraphx::shape sz{migraphx::shape::int32_type, {1, 2, 2, 2}};
// auto input2 = m.add_parameter("x_scale", ss);
// auto input3 = m.add_parameter("x_zero_point", sz);
// auto dequant = m.add_instruction(migraphx::make_op("dequantizelinear"), conv, input2, input3);
// //auto r = m.add_instruction(migraphx::make_op("quantizelinear"), dequant, input2, input3);
//
// //m.add_return({r});
// m.add_return({dequant});
// auto s = migraphx::gpu::dump_mlir(m);
// // Skip test if MLIR is not enabled
// if(s.empty())
// return;
// CHECK(encode(s) == encode(mlir_output));
// EXPECT(verify_mlir(m));
//}
TEST_CASE
(
conv_int8_dequantize_quantize
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @main(%arg0: tensor<2x8x3x3xi8>, %arg1: tensor<1x8x4x4xi8>, %arg2: tensor<1x2x2x2xf32>, %arg3: tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.quant_convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xi8>, tensor<2x8x3x3xi8>) -> tensor<1x2x2x2xi32>
%1 = migraphx.dequantizelinear(%0, %arg2, %arg3) : (tensor<1x2x2x2xi32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.quantizelinear(%1, %arg2, %arg3) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32>
return %2 : tensor<1x2x2x2xi32>
}
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
x
=
m
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
int8_type
,
{
1
,
8
,
4
,
4
}});
auto
w
=
m
.
add_parameter
(
"w"
,
{
migraphx
::
shape
::
int8_type
,
{
2
,
8
,
3
,
3
}});
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"quant_convolution"
),
x
,
w
);
migraphx
::
shape
ss
{
migraphx
::
shape
::
float_type
,
{
1
,
2
,
2
,
2
}};
migraphx
::
shape
sz
{
migraphx
::
shape
::
int32_type
,
{
1
,
2
,
2
,
2
}};
auto
input2
=
m
.
add_parameter
(
"x_scale"
,
ss
);
auto
input3
=
m
.
add_parameter
(
"x_zero_point"
,
sz
);
auto
dequant
=
m
.
add_instruction
(
migraphx
::
make_op
(
"dequantizelinear"
),
conv
,
input2
,
input3
);
auto
r
=
m
.
add_instruction
(
migraphx
::
make_op
(
"quantizelinear"
),
dequant
,
input2
,
input3
);
m
.
add_return
({
r
});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
return
;
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
//TEST_CASE(quant_dot_add)
//{
// const std::string mlir_output = R"__migraphx__(
//module {
// func.func @main(%arg0: tensor<1x5x4xi8>, %arg1: tensor<1x4x3xi8>, %arg2: tensor<1x5x3xi32>) -> tensor<1x5x3xi32> attributes {arch = "", kernel = "mixr"} {
// %0 = migraphx.quant_dot(%arg0, %arg1) : (tensor<1x5x4xi8>, tensor<1x4x3xi8>) -> tensor<1x5x3xi32>
// %1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xi32>, tensor<1x5x3xi32>) -> tensor<1x5x3xi32>
// return %1 : tensor<1x5x3xi32>
// }
//}
//)__migraphx__";
// migraphx::module m;
// //auto arg0 = m.add_parameter("arg0", {migraphx::shape::int8_type, {5, 16}});
// //auto arg1 = m.add_parameter("arg1", {migraphx::shape::int8_type, {16, 8}});
//
// auto arg0 = m.add_parameter("arg0", {migraphx::shape::float_type, {5, 16}});
// auto arg1 = m.add_parameter("arg1", {migraphx::shape::float_type, {16, 8}});
// // quantizelinear for arg0
// migraphx::shape ss1{migraphx::shape::int8_type, {5, 16}};
// auto literal1 = m.add_literal(0.00738189f);
// auto bcast1 = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss1.lens()}}), literal1);
// auto quant_linear1 = m.add_instruction(migraphx::make_op("quantizelinear"), arg0, bcast1);
// quant_linear1->debug_print();
// // quantizelinear for arg1
// migraphx::shape ss2{migraphx::shape::int8_type, {16, 8}};
// auto literal2 = m.add_literal(0.00787402f);
// auto bcast2 = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss2.lens()}}), literal2);
// auto quant_linear2 = m.add_instruction(migraphx::make_op("quantizelinear"), arg1, bcast2);
//
// auto dot = m.add_instruction(migraphx::make_op("quant_dot"), quant_linear1, quant_linear2);
//
// //auto arg2 = m.add_parameter("arg2", {migraphx::shape::int32_type, {1, 5, 8}});
// //auto add = m.add_instruction(migraphx::make_op("add"), conv, arg2);
// migraphx::shape ss{migraphx::shape::float_type, {5, 8}};
// auto literal = m.add_literal(5.81251188e-05f);
// auto bcast = m.add_instruction(migraphx::make_op("multibroadcast", {{"out_lens", ss.lens()}}), literal);
// //m.add_return({dot});
//
// auto dequant = m.add_instruction(migraphx::make_op("dequantizelinear"), dot, bcast);
// m.add_return({dequant});
//
// auto s = migraphx::gpu::dump_mlir(m);
// // Skip test if MLIR is not enabled
// if(s.empty())
// return;
// CHECK(encode(s) == encode(mlir_output));
// EXPECT(verify_mlir(m));
//}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/gpu/quantization.cpp
View file @
dd0f4f29
...
...
@@ -24,6 +24,7 @@
#include <iostream>
#include <vector>
#include <migraphx/gpu/fuse_mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/quantization.hpp>
...
...
@@ -31,31 +32,112 @@
#include <migraphx/register_target.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/onnx.hpp>
#include <test.hpp>
#include <migraphx/half.hpp>
TEST_CASE
(
gpu_target_copy
)
{
migraphx
::
target
gpu_t
=
migraphx
::
make_target
(
"gpu"
);
migraphx
::
target
ref_t
=
migraphx
::
make_target
(
"ref"
);
migraphx
::
shape
s
{
migraphx
::
shape
::
int8_type
,
{
2
,
3
,
4
,
5
}};
auto
ref_arg_orig
=
migraphx
::
generate_argument
(
s
,
0x123456L
);
auto
gpu_arg
=
gpu_t
.
copy_to
(
ref_arg_orig
);
auto
ref_arg_final
=
gpu_t
.
copy_from
(
gpu_arg
);
//TEST_CASE(gpu_target_copy)
//{
// migraphx::target gpu_t = migraphx::make_target("gpu");
// migraphx::target ref_t = migraphx::make_target("ref");
// migraphx::shape s{migraphx::shape::int8_type, {2, 3, 4, 5}};
//
// auto ref_arg_orig = migraphx::generate_argument(s, 0x123456L);
// auto gpu_arg = gpu_t.copy_to(ref_arg_orig);
// auto ref_arg_final = gpu_t.copy_from(gpu_arg);
//
// std::vector<int8_t> val_orig;
// ref_arg_orig.visit([&](auto v) { val_orig.assign(v.begin(), v.end()); });
// std::vector<int8_t> val_final;
// ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });
//
// EXPECT(migraphx::verify_range(val_orig, val_final));
//}
std
::
vector
<
int8_t
>
val_orig
;
ref_arg_orig
.
visit
([
&
](
auto
v
)
{
val_orig
.
assign
(
v
.
begin
(),
v
.
end
());
});
std
::
vector
<
int8_t
>
val_final
;
ref_arg_final
.
visit
([
&
](
auto
v
)
{
val_final
.
assign
(
v
.
begin
(),
v
.
end
());
});
EXPECT
(
migraphx
::
verify_range
(
val_orig
,
val_final
));
}
//TEST_CASE(int8_quantization)
//{
// auto run_prog = [](migraphx::program p,
// const migraphx::target& t,
// migraphx::parameter_map& m_in,
// std::vector<float>& res) {
// std::vector<migraphx::parameter_map> cali_data;
// cali_data.push_back(m_in);
// migraphx::quantize_int8(p, t, cali_data);
// p.compile(t);
// migraphx::parameter_map m;
// for(auto&& x : p.get_parameter_shapes())
// {
// if(m_in.count(x.first) > 0)
// {
// m[x.first] = t.copy_to(m_in[x.first]);
// }
// else
// {
// m[x.first] = t.allocate(x.second);
// }
// }
//
// auto result = t.copy_from(p.eval(m).back());
// result.visit([&](auto v) { res.assign(v.begin(), v.end()); });
// };
//
// auto create_program = [] {
// migraphx::program p;
// auto* mm = p.get_main_module();
// migraphx::shape sa{migraphx::shape::float_type, {5, 16}};
// migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
// migraphx::shape sc{migraphx::shape::float_type, {5, 8}};
// auto pa = mm->add_parameter("a", sa);
// auto pb = mm->add_parameter("b", sb);
// mm->add_instruction(migraphx::op::dot{}, pa, pb);
//
// return p;
// };
//
// {
// auto p = create_program();
// migraphx::parameter_map m;
// migraphx::shape sa{migraphx::shape::float_type, {5, 16}};
// migraphx::shape sb{migraphx::shape::float_type, {16, 8}};
// migraphx::shape sc{migraphx::shape::float_type, {5, 8}};
// m["a"] = migraphx::generate_argument(sa);
// m["b"] = migraphx::generate_argument(sb);
// std::vector<float> ref_result;
// migraphx::target ref_t = migraphx::make_target("ref");
// run_prog(p, ref_t, m, ref_result);
// // print ref_result
// std::cout << "ref_result: ";
// for(auto&& v : ref_result)
// std::cout << v << " ";
// std::cout << std::endl;
//
// std::vector<float> gpu_result;
// migraphx::target gpu_t = migraphx::make_target("gpu");
// run_prog(p, gpu_t, m, gpu_result);
// std::cout << "gpu_result: ";
// for(auto&& v : gpu_result)
// std::cout << v << " ";
// std::cout << std::endl;
//
// auto s = migraphx::gpu::dump_mlir(*p.get_main_module());
// //std::cout << s << std::endl;
// // Note: the tolerance for mlir_enabled result is temporarily bumped
// // higher because the lowering pipeline between mlir fallback and
// // regular non-mlir pipeline diverged. MLIR fallback uses the
// // rewrite_quantization at the very end of the pipeline, whereas
// // the regular pipeline uses the rewrite_quantization in the much
// // earlier stage.
// //if(migraphx::gpu::mlir_enabled())
// // EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
// //else
// EXPECT(migraphx::verify_range(ref_result, gpu_result));
// }
//}
TEST_CASE
(
int8_quantization
)
TEST_CASE
(
int8_quantization
_self
)
{
auto
run_prog
=
[](
migraphx
::
program
p
,
const
migraphx
::
target
&
t
,
...
...
@@ -63,7 +145,7 @@ TEST_CASE(int8_quantization)
std
::
vector
<
float
>&
res
)
{
std
::
vector
<
migraphx
::
parameter_map
>
cali_data
;
cali_data
.
push_back
(
m_in
);
migraphx
::
quantize_int8
(
p
,
t
,
cali_data
);
//
migraphx::quantize_int8(p, t, cali_data);
p
.
compile
(
t
);
migraphx
::
parameter_map
m
;
for
(
auto
&&
x
:
p
.
get_parameter_shapes
())
...
...
@@ -88,9 +170,34 @@ TEST_CASE(int8_quantization)
migraphx
::
shape
sa
{
migraphx
::
shape
::
float_type
,
{
5
,
16
}};
migraphx
::
shape
sb
{
migraphx
::
shape
::
float_type
,
{
16
,
8
}};
migraphx
::
shape
sc
{
migraphx
::
shape
::
float_type
,
{
5
,
8
}};
//migraphx::shape sa{migraphx::shape::int8_type, {5, 16}};
//migraphx::shape sb{migraphx::shape::int8_type, {16, 8}};
//migraphx::shape sc{migraphx::shape::int32_type, {5, 8}};
auto
pa
=
mm
->
add_parameter
(
"a"
,
sa
);
auto
pb
=
mm
->
add_parameter
(
"b"
,
sb
);
mm
->
add_instruction
(
migraphx
::
op
::
dot
{},
pa
,
pb
);
// quantizelinear for arg0
migraphx
::
shape
ss1
{
migraphx
::
shape
::
int8_type
,
{
5
,
16
}};
auto
literal1
=
mm
->
add_literal
(
0.00738189
f
);
auto
bcast1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
ss1
.
lens
()}}),
literal1
);
auto
quant_linear1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"quantizelinear"
),
pa
,
bcast1
);
//quant_linear1->debug_print();
// quantizelinear for arg1
migraphx
::
shape
ss2
{
migraphx
::
shape
::
int8_type
,
{
16
,
8
}};
auto
literal2
=
mm
->
add_literal
(
0.00787402
f
);
auto
bcast2
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
ss2
.
lens
()}}),
literal2
);
auto
quant_linear2
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"quantizelinear"
),
pb
,
bcast2
);
//auto dot = mm->add_instruction(migraphx::op::dot{}, pa, pb);
//auto dot = mm->add_instruction(migraphx::op::quant_dot{}, pa, pb);
auto
dot
=
mm
->
add_instruction
(
migraphx
::
op
::
quant_dot
{},
quant_linear1
,
quant_linear2
);
migraphx
::
shape
ss
{
migraphx
::
shape
::
float_type
,
{
5
,
8
}};
auto
literal
=
mm
->
add_literal
(
5.81251188e-05
f
);
auto
bcast
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
ss
.
lens
()}}),
literal
);
auto
dequant
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"dequantizelinear"
),
dot
,
bcast
);
mm
->
add_return
({
dequant
});
return
p
;
};
...
...
@@ -101,25 +208,39 @@ TEST_CASE(int8_quantization)
migraphx
::
shape
sa
{
migraphx
::
shape
::
float_type
,
{
5
,
16
}};
migraphx
::
shape
sb
{
migraphx
::
shape
::
float_type
,
{
16
,
8
}};
migraphx
::
shape
sc
{
migraphx
::
shape
::
float_type
,
{
5
,
8
}};
//migraphx::shape sa{migraphx::shape::int8_type, {5, 16}};
//migraphx::shape sb{migraphx::shape::int8_type, {16, 8}};
//migraphx::shape sc{migraphx::shape::int32_type, {5, 8}};
m
[
"a"
]
=
migraphx
::
generate_argument
(
sa
);
m
[
"b"
]
=
migraphx
::
generate_argument
(
sb
);
std
::
vector
<
float
>
ref_result
;
migraphx
::
target
ref_t
=
migraphx
::
make_target
(
"ref"
);
run_prog
(
p
,
ref_t
,
m
,
ref_result
);
// print ref_result
std
::
cout
<<
"ref_result: "
;
for
(
auto
&&
v
:
ref_result
)
std
::
cout
<<
v
<<
" "
;
std
::
cout
<<
std
::
endl
;
std
::
vector
<
float
>
gpu_result
;
migraphx
::
target
gpu_t
=
migraphx
::
make_target
(
"gpu"
);
run_prog
(
p
,
gpu_t
,
m
,
gpu_result
);
std
::
cout
<<
"gpu_result: "
;
for
(
auto
&&
v
:
gpu_result
)
std
::
cout
<<
v
<<
" "
;
std
::
cout
<<
std
::
endl
;
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
*
p
.
get_main_module
());
//std::cout << s << std::endl;
// Note: the tolerance for mlir_enabled result is temporarily bumped
// higher because the lowering pipeline between mlir fallback and
// regular non-mlir pipeline diverged. MLIR fallback uses the
// rewrite_quantization at the very end of the pipeline, whereas
// the regular pipeline uses the rewrite_quantization in the much
// earlier stage.
if
(
migraphx
::
gpu
::
mlir_enabled
())
EXPECT
(
migraphx
::
verify_range
(
ref_result
,
gpu_result
,
1e5
));
else
//
if(migraphx::gpu::mlir_enabled())
//
EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
//
else
EXPECT
(
migraphx
::
verify_range
(
ref_result
,
gpu_result
));
}
}
...
...
test/quantization.cpp
View file @
dd0f4f29
...
...
@@ -647,6 +647,7 @@ TEST_CASE(dot_float)
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
sc
.
lens
()}}),
dc
);
auto
r
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"dequantizelinear"
),
quant
,
mdc
);
mm
->
add_return
({
r
});
mm
->
debug_print
();
return
p
;
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment