Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
028280dc
"docs/en_US/vscode:/vscode.git/clone" did not exist on "8fc555ad7b0537d9a85110cd8e6f1c68d49bedfc"
Commit
028280dc
authored
Aug 01, 2023
by
Alan Turner
Browse files
Formatting
parent
e1bd6573
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
27 deletions
+44
-27
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+11
-7
tools/gemm_perf.py
tools/gemm_perf.py
+33
-20
No files found.
src/rewrite_quantization.cpp
View file @
028280dc
...
...
@@ -64,23 +64,26 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant
=
qt
.
max
();
min_quant
=
qt
.
min
();
});
if
(
enabled
(
MIGRAPHX_BROADCAST_Q
{}))
if
(
enabled
(
MIGRAPHX_BROADCAST_Q
{}))
{
auto
s
=
add_zero_point
->
get_shape
();
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
// auto min_mbcast =
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}),
// min_arg);
// auto max_mbcast =
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}),
// max_arg);
// auto saturate =
// m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
}
else
else
{
auto
s
=
add_zero_point
->
get_shape
();
std
::
vector
<
int
>
min_data
(
s
.
elements
(),
min_quant
);
...
...
@@ -88,7 +91,8 @@ void apply_quantizelinear(module& m, instruction_ref ins)
auto
min_arg
=
m
.
add_literal
(
literal
(
s
,
min_data
));
auto
max_arg
=
m
.
add_literal
(
literal
(
s
,
max_data
));
auto
saturate
=
m
.
insert_instruction
(
ins
,
make_op
(
"clip"
),
add_zero_point
,
min_arg
,
max_arg
);
auto
saturate
=
m
.
insert_instruction
(
ins
,
make_op
(
"clip"
),
add_zero_point
,
min_arg
,
max_arg
);
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
}
...
...
tools/gemm_perf.py
View file @
028280dc
import
subprocess
,
csv
,
re
,
datetime
class
CSVFile
:
def
__init__
(
self
,
path
=
"output.csv"
):
self
.
path
=
path
...
...
@@ -18,20 +20,23 @@ def get_device_name():
matches
=
re
.
findall
(
"gfx\d*[a-z]*"
,
str
(
out
.
stdout
))
return
matches
[
0
]
def
run_perf
(
model
,
batch_size
,
int8
=
False
,
use_ck
=
False
,
use_large_k
=
False
,
disable_fusion
=
False
):
def
run_perf
(
model
,
batch_size
,
int8
=
False
,
use_ck
=
False
,
use_large_k
=
False
,
disable_fusion
=
False
):
env_vars
=
""
if
use_ck
:
env_vars
+=
"MIGRAPHX_ENABLE_CK=1 "
env_vars
+=
"MIGRAPHX_ENABLE_CK=1 "
if
use_large_k
:
env_vars
+=
"MIGRAPHX_USE_LARGE_K=1 "
env_vars
+=
"MIGRAPHX_USE_LARGE_K=1 "
if
disable_fusion
:
env_vars
+=
"MIGRAPHX_DISABLE_CK_FUSION=1 "
int8_str
=
"--int8"
if
int8
else
""
cmd
=
f
"
{
env_vars
}
../build/bin/driver perf
{
model
}
--fill1 input_ids --input-dim @input_ids
{
batch_size
}
384 --batch
{
batch_size
}
--fp16
{
int8_str
}
--exhaustive-tune"
out
=
subprocess
.
run
(
cmd
,
capture_output
=
True
,
check
=
True
,
shell
=
True
)
out
=
subprocess
.
run
(
cmd
,
capture_output
=
True
,
check
=
True
,
shell
=
True
)
summary
=
re
.
findall
(
"Summary.*"
,
str
(
out
.
stdout
))[
0
].
replace
(
"
\\
n"
,
"
\n
"
)
total_time
=
re
.
findall
(
"Total time: \d+\.\d*"
,
summary
)[
0
]
...
...
@@ -42,13 +47,14 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
ck_gemm_time
=
re
.
findall
(
"\d+\.\d*"
,
ck_gemm_time
[
0
])[
0
]
else
:
ck_gemm_time
=
"0.0"
rb_gemm_time
=
re
.
findall
(
"gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*"
,
summary
)
rb_gemm_time
=
re
.
findall
(
"gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*"
,
summary
)
if
rb_gemm_time
:
rb_gemm_time
=
re
.
findall
(
"\d+\.\d*"
,
rb_gemm_time
[
0
])[
0
]
else
:
rb_gemm_time
=
"0.0"
gemm_pack_time
=
re
.
findall
(
"gpu::int8_gemm_pack_a: \d+\.\d*"
,
summary
)
if
gemm_pack_time
:
gemm_pack_time
=
re
.
findall
(
"\d+\.\d*"
,
gemm_pack_time
[
0
])[
0
]
...
...
@@ -64,19 +70,19 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
with
open
(
"perf_summaries.txt"
,
"a+"
)
as
f
:
f
.
write
(
cmd
+
"
\n
"
)
f
.
write
(
summary
+
"
\n\n
"
)
return
[
total_time
]
+
gemm_times
def
run_ck_perf
(
model
,
batch_size
,
int8
=
False
,
use_large_k
=
False
):
# CK with fusions
# CK with fusions
total_time
=
run_perf
(
model
,
batch_size
,
int8
,
True
,
use_large_k
,
False
)[
0
]
# CK without fusions
# CK without fusions
gemm_times
=
run_perf
(
model
,
batch_size
,
int8
,
True
,
use_large_k
,
True
)
return
[
total_time
]
+
gemm_times
[
1
:]
if
__name__
==
"__main__"
:
device_id
=
get_device_name
()
model
=
"/code/bert_base_cased_1_fp16_gpu.onnx"
...
...
@@ -84,7 +90,10 @@ if __name__ == "__main__":
cf
.
write_row
([
str
(
datetime
.
datetime
.
now
())])
cf
.
write_row
([
device_id
])
cf
.
write_row
([
model
])
headers
=
[
""
,
"Total Time (ms)"
,
"CK GEMM Time (ms)"
,
"RB GEMM Time (ms)"
,
"GEMM Pack Time (ms)"
,
"Total GEMM Time (ms)"
]
headers
=
[
""
,
"Total Time (ms)"
,
"CK GEMM Time (ms)"
,
"RB GEMM Time (ms)"
,
"GEMM Pack Time (ms)"
,
"Total GEMM Time (ms)"
]
batch_size
=
"1"
# int8:
...
...
@@ -95,7 +104,8 @@ if __name__ == "__main__":
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
...
...
@@ -108,7 +118,8 @@ if __name__ == "__main__":
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
...
...
@@ -122,7 +133,8 @@ if __name__ == "__main__":
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
...
...
@@ -135,7 +147,8 @@ if __name__ == "__main__":
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
\ No newline at end of file
cf
.
write_row
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment