Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
d4dc1f9f
Commit
d4dc1f9f
authored
Oct 30, 2023
by
Artur Wojcik
Browse files
Merge branch 'develop' into uif2-initial
parents
5fbda037
22bb777f
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
2 additions
and
482 deletions
+2
-482
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+0
-13
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+0
-2
test/gpu/pack_int8_args.cpp
test/gpu/pack_int8_args.cpp
+0
-465
test/verify/quant_conv_1.cpp
test/verify/quant_conv_1.cpp
+1
-1
test/verify/quant_conv_2.cpp
test/verify/quant_conv_2.cpp
+1
-1
No files found.
src/targets/gpu/rocblas.cpp
View file @
d4dc1f9f
...
...
@@ -53,19 +53,6 @@ bool get_compute_fp32_flag()
return
(
starts_with
(
device_name
,
"gfx9"
)
and
device_name
>=
"gfx908"
);
}
bool
get_int8_x4_format
(
context
&
ctx
)
{
#if ROCBLAS_VERSION_MAJOR >= 3
(
void
)(
ctx
);
return
false
;
#else
// int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
// v3.0 and will be removed in v4.0
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
return
flag
==
rocblas_gemm_flags_pack_int8x4
;
#endif
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/target.cpp
View file @
d4dc1f9f
...
...
@@ -63,7 +63,6 @@
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/schedule_model.hpp>
#include <migraphx/gpu/sync_device.hpp>
#include <migraphx/gpu/target.hpp>
...
...
@@ -154,7 +153,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
compile_miopen
{
&
gctx
},
dead_code_elimination
{},
pack_int8_args
{},
dead_code_elimination
{},
fuse_ops
{
&
ctx
,
options
.
fast_math
},
dead_code_elimination
{},
...
...
test/gpu/pack_int8_args.cpp
deleted
100644 → 0
View file @
5fbda037
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/instruction_ref.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/apply_alpha_beta.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/auto_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/make_op.hpp>
#include <test.hpp>
#include "make_precompile_op.hpp"
// Treat some operators as compilable to enable lowering
MIGRAPHX_GPU_TEST_PRECOMPILE
(
"add"
,
"mul"
,
"convert"
)
void
run_passes
(
migraphx
::
module
&
m
,
migraphx
::
gpu
::
context
&
ctx
)
{
migraphx
::
run_passes
(
m
,
{
migraphx
::
auto_contiguous
{},
migraphx
::
gpu
::
lowering
{
&
ctx
,
false
},
migraphx
::
dead_code_elimination
{},
migraphx
::
replace_allocate
{
migraphx
::
gpu
::
gpu_allocation_model
{}},
migraphx
::
dead_code_elimination
{},
migraphx
::
gpu
::
pack_int8_args
{},
migraphx
::
dead_code_elimination
{}});
}
TEST_CASE
(
quant_dot
)
{
auto
create_module
=
[]
{
migraphx
::
module
m
(
"test"
);
migraphx
::
shape
m1_shape
{
migraphx
::
shape
::
int8_type
,
{
5
,
8
}};
migraphx
::
shape
m2_shape
{
migraphx
::
shape
::
int8_type
,
{
8
,
7
}};
migraphx
::
shape
m3_shape
{
migraphx
::
shape
::
int32_type
,
{
5
,
7
}};
auto
l1
=
m
.
add_parameter
(
"a"
,
m1_shape
);
auto
l2
=
m
.
add_parameter
(
"b"
,
m2_shape
);
auto
l3
=
m
.
add_parameter
(
"c"
,
m3_shape
);
auto
r
=
migraphx
::
add_apply_alpha_beta
(
m
,
{
l1
,
l2
,
l3
},
migraphx
::
make_op
(
"quant_dot"
),
1
,
1
);
m
.
add_return
({
r
});
return
m
;
};
auto
create_optimized_int8_x4
=
[](
bool
int8_x4
)
{
migraphx
::
module
m
(
"test"
);
migraphx
::
shape
m1_shape
{
migraphx
::
shape
::
int8_type
,
{
5
,
8
}};
migraphx
::
shape
m2_shape
{
migraphx
::
shape
::
int8_type
,
{
8
,
7
}};
migraphx
::
shape
m3_shape
{
migraphx
::
shape
::
int32_type
,
{
5
,
7
}};
auto
l1
=
m
.
add_parameter
(
"a"
,
m1_shape
);
auto
l2
=
m
.
add_parameter
(
"b"
,
m2_shape
);
auto
l3
=
m
.
add_parameter
(
"c"
,
m3_shape
);
auto
beta
=
m
.
add_literal
(
1
);
auto
output
=
m
.
add_parameter
(
"test:#output_0"
,
m3_shape
);
auto
gemm_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m3_shape
)}}));
auto
packa
=
l2
;
if
(
int8_x4
)
{
auto
alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m2_shape
)}}));
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
l2
,
alloc
);
}
auto
gemm
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
l1
,
packa
,
gemm_alloc
);
auto
beta_broadcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
m3_shape
.
lens
()}}),
beta
);
auto
beta_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m3_shape
)}}));
auto
beta_contiguous
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
m3_shape
)}}));
auto
m3_beta
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
make_precompile_op
(
"add"
),
gemm
,
m3_beta
,
output
);
m
.
add_return
({
gemm_add
});
return
m
;
};
auto
m1
=
create_module
();
auto
ctx
=
migraphx
::
gpu
::
context
{};
run_passes
(
m1
,
ctx
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
}
TEST_CASE
(
quant_dot_trans
)
{
auto
create_module
=
[]
{
migraphx
::
module
m
(
"test"
);
migraphx
::
shape
s1
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
8
,
5
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
7
,
8
}};
auto
l1
=
m
.
add_parameter
(
"a"
,
s1
);
auto
tl1
=
m
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
1
,
3
,
2
}}}),
l1
);
auto
l2
=
m
.
add_parameter
(
"b"
,
s2
);
auto
tl2
=
m
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
1
,
3
,
2
}}}),
l2
);
auto
r
=
migraphx
::
add_apply_alpha_beta
(
m
,
{
tl1
,
tl2
},
migraphx
::
make_op
(
"quant_dot"
),
3
);
m
.
add_return
({
r
});
return
m
;
};
auto
create_optimized_int8_x4
=
[](
bool
int8_x4
)
{
migraphx
::
module
m
(
"test"
);
migraphx
::
shape
s1
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
8
,
5
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
7
,
8
}};
migraphx
::
shape
s3
{
migraphx
::
shape
::
int32_type
,
{
3
,
2
,
5
,
7
}};
auto
l1
=
m
.
add_parameter
(
"a"
,
s1
);
auto
l2
=
m
.
add_parameter
(
"b"
,
s2
);
auto
alpha
=
m
.
add_literal
(
3
);
auto
output
=
m
.
add_parameter
(
"test:#output_0"
,
s3
);
auto
tl1
=
m
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
1
,
3
,
2
}}}),
l1
);
migraphx
::
shape
ts1
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
5
,
8
}};
auto
alloca
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ts1
)}}));
auto
conta
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
tl1
,
alloca
);
auto
tl2
=
m
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
1
,
3
,
2
}}}),
l2
);
migraphx
::
shape
ts2
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
8
,
7
}};
auto
allocb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ts2
)}}));
auto
contb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
tl2
,
allocb
);
auto
alpha_broadcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
conta
->
get_shape
().
lens
()}}),
alpha
);
auto
alpha_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
migraphx
::
shape
(
migraphx
::
shape
::
int32_type
,
{
3
,
2
,
5
,
8
}))}}));
auto
alpha_contiguous
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
alpha_broadcast
,
alpha_alloc
);
// alpha = int32 and tl1 = int8, convert tl1 to int32 for multiplication and then convert
// back result to int8
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
auto
tl1_convert
=
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}})),
conta
,
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
auto
tl1_alpha_int32
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
// convert mul_res to int8
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
auto
tl1_alpha_int8
=
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}})),
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
packb
=
contb
;
if
(
int8_x4
)
{
auto
allocpb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ts2
)}}));
packb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
contb
,
allocpb
);
}
auto
gemm
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
tl1_alpha_int8
,
packb
,
output
);
m
.
add_return
({
gemm
});
return
m
;
};
auto
m1
=
create_module
();
auto
ctx
=
migraphx
::
gpu
::
context
{};
run_passes
(
m1
,
ctx
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
}
TEST_CASE
(
quant_dot_pad
)
{
auto
create_module
=
[]
{
migraphx
::
module
m
(
"test"
);
migraphx
::
shape
s1
{
migraphx
::
shape
::
int8_type
,
{
5
,
6
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int8_type
,
{
6
,
7
}};
migraphx
::
shape
s3
{
migraphx
::
shape
::
int32_type
,
{
5
,
7
}};
auto
l1
=
m
.
add_parameter
(
"a"
,
s1
);
auto
l2
=
m
.
add_parameter
(
"b"
,
s2
);
auto
l3
=
m
.
add_parameter
(
"c"
,
s3
);
auto
r
=
migraphx
::
add_apply_alpha_beta
(
m
,
{
l1
,
l2
,
l3
},
migraphx
::
make_op
(
"quant_dot"
),
1
,
1
);
m
.
add_return
({
r
});
return
m
;
};
auto
create_optimized_int8_x4
=
[](
bool
int8_x4
)
{
migraphx
::
module
m
(
"test"
);
migraphx
::
shape
s1
{
migraphx
::
shape
::
int8_type
,
{
5
,
6
}};
migraphx
::
shape
ps1
{
migraphx
::
shape
::
int8_type
,
{
5
,
8
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int8_type
,
{
6
,
7
}};
migraphx
::
shape
ps2
{
migraphx
::
shape
::
int8_type
,
{
8
,
7
}};
migraphx
::
shape
s3
{
migraphx
::
shape
::
int32_type
,
{
5
,
7
}};
auto
l1
=
m
.
add_parameter
(
"a"
,
s1
);
auto
l2
=
m
.
add_parameter
(
"b"
,
s2
);
auto
l3
=
m
.
add_parameter
(
"c"
,
s3
);
auto
beta
=
m
.
add_literal
(
1
);
auto
output
=
m
.
add_parameter
(
"test:#output_0"
,
s3
);
auto
pl1
=
l1
;
auto
packa
=
l2
;
migraphx
::
instruction_ref
pl2
{};
if
(
int8_x4
)
{
auto
po1
=
m
.
insert_instruction
(
l1
,
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps1
)}}));
pl1
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::pad"
,
{{
"mode"
,
0
},
{
"pads"
,
{
0
,
2
,
0
,
0
}},
{
"value"
,
0
}}),
l1
,
po1
);
auto
po2
=
m
.
insert_instruction
(
l2
,
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps2
)}}));
pl2
=
m
.
insert_instruction
(
std
::
next
(
l2
),
migraphx
::
make_op
(
"gpu::pad"
,
{{
"mode"
,
0
},
{
"pads"
,
{
2
,
0
,
0
,
0
}},
{
"value"
,
0
}}),
l2
,
po2
);
}
auto
gemm_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
s3
)}}));
if
(
int8_x4
)
{
auto
alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps2
)}}));
packa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
pl2
,
alloc
);
}
auto
gemm
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
pl1
,
packa
,
gemm_alloc
);
auto
beta_broadcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s3
.
lens
()}}),
beta
);
auto
beta_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
s3
)}}));
auto
beta_contiguous
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
beta_broadcast
,
beta_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
s3
)}}));
auto
m3_beta
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
l3
,
beta_contiguous
,
mul_alloc
);
auto
gemm_add
=
m
.
add_instruction
(
make_precompile_op
(
"add"
),
gemm
,
m3_beta
,
output
);
m
.
add_return
({
gemm_add
});
return
m
;
};
auto
m1
=
create_module
();
auto
ctx
=
migraphx
::
gpu
::
context
{};
run_passes
(
m1
,
ctx
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
}
TEST_CASE
(
quant_dot_trans_pad
)
{
auto
create_module
=
[]
{
migraphx
::
module
m
(
"test"
);
migraphx
::
shape
s1
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
9
,
5
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
7
,
9
}};
auto
l1
=
m
.
add_parameter
(
"a"
,
s1
);
auto
tl1
=
m
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
1
,
3
,
2
}}}),
l1
);
auto
l2
=
m
.
add_parameter
(
"b"
,
s2
);
auto
tl2
=
m
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
1
,
3
,
2
}}}),
l2
);
auto
r
=
migraphx
::
add_apply_alpha_beta
(
m
,
{
tl1
,
tl2
},
migraphx
::
make_op
(
"quant_dot"
),
3
);
m
.
add_return
({
r
});
return
m
;
};
auto
create_optimized_int8_x4
=
[](
bool
int8_x4
)
{
migraphx
::
module
m
(
"test"
);
migraphx
::
shape
s1
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
9
,
5
}};
migraphx
::
shape
ps1
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
5
,
12
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
7
,
9
}};
migraphx
::
shape
ps2
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
12
,
7
}};
migraphx
::
shape
s3
{
migraphx
::
shape
::
int32_type
,
{
3
,
2
,
5
,
7
}};
auto
l1
=
m
.
add_parameter
(
"a"
,
s1
);
auto
l2
=
m
.
add_parameter
(
"b"
,
s2
);
auto
alpha
=
m
.
add_literal
(
3
);
auto
output
=
m
.
add_parameter
(
"test:#output_0"
,
s3
);
auto
tl1
=
m
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
1
,
3
,
2
}}}),
l1
);
migraphx
::
shape
ts1
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
5
,
9
}};
auto
ta
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ts1
)}}));
auto
conta
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
tl1
,
ta
);
auto
tl2
=
m
.
add_instruction
(
migraphx
::
make_op
(
"transpose"
,
{{
"permutation"
,
{
0
,
1
,
3
,
2
}}}),
l2
);
migraphx
::
shape
ts2
{
migraphx
::
shape
::
int8_type
,
{
3
,
2
,
9
,
7
}};
auto
tb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ts2
)}}));
migraphx
::
instruction_ref
ptb
{};
if
(
int8_x4
)
{
ptb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps2
)}}));
}
auto
contb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
tl2
,
tb
);
auto
pb
=
contb
;
if
(
int8_x4
)
{
pb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::pad"
,
{{
"mode"
,
0
},
{
"pads"
,
{
0
,
0
,
3
,
0
,
0
,
0
,
0
,
0
}}}),
contb
,
ptb
);
}
auto
alpha_broadcast
=
m
.
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
conta
->
get_shape
().
lens
()}}),
alpha
);
auto
alpha_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
migraphx
::
shape
(
migraphx
::
shape
::
int32_type
,
conta
->
get_shape
().
lens
()))}}));
auto
alpha_contiguous
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::contiguous"
),
alpha_broadcast
,
alpha_alloc
);
// alpha = int32 and tl1 = int8, convert tl1 to int32 for multiplication and then convert
// back result to int8
auto
tl1_convert_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
alpha_contiguous
->
get_shape
())}}));
auto
tl1_convert
=
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
alpha
->
get_shape
().
type
()}})),
conta
,
tl1_convert_alloc
);
auto
mul_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
tl1_convert
->
get_shape
())}}));
auto
tl1_alpha_int32
=
m
.
add_instruction
(
make_precompile_op
(
"mul"
),
alpha_contiguous
,
tl1_convert
,
mul_alloc
);
// convert mul_res to int8
auto
tl1_alpha_int8_alloc
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
conta
->
get_shape
())}}));
migraphx
::
instruction_ref
pta
{};
if
(
int8_x4
)
{
pta
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps1
)}}));
}
auto
tl1_alpha_int8
=
m
.
add_instruction
(
make_precompile_op
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
conta
->
get_shape
().
type
()}})),
tl1_alpha_int32
,
tl1_alpha_int8_alloc
);
auto
pa
=
tl1_alpha_int8
;
if
(
int8_x4
)
{
pa
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::pad"
,
{{
"mode"
,
0
},
{
"pads"
,
{
0
,
0
,
0
,
3
,
0
,
0
,
0
,
0
}}}),
tl1_alpha_int8
,
pta
);
}
auto
packb
=
pb
;
if
(
int8_x4
)
{
auto
allocpb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"hip::allocate"
,
{{
"shape"
,
migraphx
::
to_value
(
ps2
)}}));
packb
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::int8_gemm_pack_a"
),
pb
,
allocpb
);
}
auto
gemm
=
m
.
add_instruction
(
migraphx
::
make_op
(
"gpu::quant_gemm"
,
{{
"int8_x4_format"
,
int8_x4
},
{
"compute_fp32"
,
migraphx
::
gpu
::
get_compute_fp32_flag
()}}),
pa
,
packb
,
output
);
m
.
add_return
({
gemm
});
return
m
;
};
auto
m1
=
create_module
();
auto
ctx
=
migraphx
::
gpu
::
context
{};
run_passes
(
m1
,
ctx
);
bool
int8_x4
=
migraphx
::
gpu
::
get_int8_x4_format
(
ctx
);
auto
m2
=
create_optimized_int8_x4
(
int8_x4
);
EXPECT
(
m1
==
m2
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/verify/quant_conv_
default_mode
.cpp
→
test/verify/quant_conv_
1
.cpp
View file @
d4dc1f9f
...
...
@@ -27,7 +27,7 @@
#include <migraphx/generate.hpp>
#include <migraphx/op/quant_convolution.hpp>
struct
quant_conv_
default_mode
:
verify_program
<
quant_conv_
default_mode
>
struct
quant_conv_
1
:
verify_program
<
quant_conv_
1
>
{
migraphx
::
program
create_program
()
const
{
...
...
test/verify/quant_conv_
int8x4_default
.cpp
→
test/verify/quant_conv_
2
.cpp
View file @
d4dc1f9f
...
...
@@ -27,7 +27,7 @@
#include <migraphx/generate.hpp>
#include <migraphx/op/quant_convolution.hpp>
struct
quant_conv_
int8x4_default
:
verify_program
<
quant_conv_
int8x4_default
>
struct
quant_conv_
2
:
verify_program
<
quant_conv_
2
>
{
migraphx
::
program
create_program
()
const
{
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment