Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
e1bd6573
Commit
e1bd6573
authored
Aug 01, 2023
by
Alan Turner
Browse files
Use insert_common_op
parent
1f106ca7
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
113 additions
and
23 deletions
+113
-23
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+8
-6
tools/gemm_perf.py
tools/gemm_perf.py
+105
-17
No files found.
src/rewrite_quantization.cpp
View file @
e1bd6573
...
@@ -28,6 +28,7 @@
...
@@ -28,6 +28,7 @@
#include <migraphx/tune_axis.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/common.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -68,13 +69,14 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -68,13 +69,14 @@ void apply_quantizelinear(module& m, instruction_ref ins)
auto
s
=
add_zero_point
->
get_shape
();
auto
s
=
add_zero_point
->
get_shape
();
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
auto
min_mbcast
=
//
auto min_mbcast =
m
.
insert_instruction
(
ins
,
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
min_arg
);
//
m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
auto
max_mbcast
=
//
auto max_mbcast =
m
.
insert_instruction
(
ins
,
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
max_arg
);
//
m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
auto
saturate
=
// auto saturate =
m
.
insert_instruction
(
ins
,
make_op
(
"clip"
),
add_zero_point
,
min_mbcast
,
max_mbcast
);
// m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
m
.
replace_instruction
(
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
}
}
...
...
tools/gemm_perf.py
View file @
e1bd6573
import
subprocess
,
csv
,
re
import
subprocess
,
csv
,
re
,
datetime
class
CSVFile
:
def
__init__
(
self
,
path
=
"output.csv"
):
self
.
path
=
path
def
write_row
(
self
,
row
=
[]):
with
open
(
self
.
path
,
"a+"
)
as
f
:
cw
=
csv
.
writer
(
f
)
cw
.
writerow
(
row
)
def
get_device_name
():
def
get_device_name
():
out
=
subprocess
.
run
(
"rocminfo"
,
out
=
subprocess
.
run
(
"rocminfo"
,
...
@@ -17,37 +27,115 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
...
@@ -17,37 +27,115 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
if
disable_fusion
:
if
disable_fusion
:
env_vars
+=
"MIGRAPHX_DISABLE_CK_FUSION=1 "
env_vars
+=
"MIGRAPHX_DISABLE_CK_FUSION=1 "
int8_str
=
"--int8"
if
int8
else
""
int8_str
=
"--int8"
if
int8
else
""
cmd
=
"{env_vars} ../build/bin/driver perf {model} --fill1 input_ids --input-dim @input_ids {batch_size} 384 --batch {batch_size} --fp16 {int8} --exhaustive-tune"
.
format
(
cmd
=
f
"
{
env_vars
}
../build/bin/driver perf
{
model
}
--fill1 input_ids --input-dim @input_ids
{
batch_size
}
384 --batch
{
batch_size
}
--fp16
{
int8_str
}
--exhaustive-tune"
env_vars
=
env_vars
,
model
=
model
,
batch_size
=
str
(
batch_size
),
int8
=
int8_str
)
out
=
subprocess
.
run
(
cmd
,
out
=
subprocess
.
run
(
cmd
,
capture_output
=
True
,
capture_output
=
True
,
check
=
True
,
check
=
True
,
shell
=
True
)
shell
=
True
)
summary
=
re
.
findall
(
"Summary.*"
,
str
(
out
.
stdout
))[
0
].
replace
(
"
\\
n"
,
"
\n
"
)
summary
=
re
.
findall
(
"Summary.*"
,
str
(
out
.
stdout
))[
0
].
replace
(
"
\\
n"
,
"
\n
"
)
total_time
=
re
.
findall
(
"Total time: \d+\.\d*"
,
summary
)[
0
]
total_time
=
re
.
findall
(
"Total time: \d+\.\d*"
,
summary
)[
0
]
total_time
=
total_time
.
replace
(
"Total time: "
,
""
)
total_time
=
total_time
.
replace
(
"Total time: "
,
""
)
print
(
summary
)
ck_gemm_time
=
re
.
findall
(
"ck_gemm_kernel: \d+\.\d*"
,
summary
)
print
(
total_time
)
if
ck_gemm_time
:
with
open
(
"summaries.txt"
,
"w+"
)
as
f
:
ck_gemm_time
=
re
.
findall
(
"\d+\.\d*"
,
ck_gemm_time
[
0
])[
0
]
else
:
ck_gemm_time
=
"0.0"
rb_gemm_time
=
re
.
findall
(
"gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*"
,
summary
)
if
rb_gemm_time
:
rb_gemm_time
=
re
.
findall
(
"\d+\.\d*"
,
rb_gemm_time
[
0
])[
0
]
else
:
rb_gemm_time
=
"0.0"
gemm_pack_time
=
re
.
findall
(
"gpu::int8_gemm_pack_a: \d+\.\d*"
,
summary
)
if
gemm_pack_time
:
gemm_pack_time
=
re
.
findall
(
"\d+\.\d*"
,
gemm_pack_time
[
0
])[
0
]
else
:
gemm_pack_time
=
"0.0"
gemm_times
=
[
ck_gemm_time
,
rb_gemm_time
,
gemm_pack_time
]
total_gemm_time
=
[
str
(
sum
(
map
(
float
,
gemm_times
)))]
gemm_times
.
extend
(
total_gemm_time
)
print
(
cmd
)
print
(
total_time
+
"ms"
)
with
open
(
"perf_summaries.txt"
,
"a+"
)
as
f
:
f
.
write
(
cmd
+
"
\n
"
)
f
.
write
(
cmd
+
"
\n
"
)
f
.
write
(
summary
+
"
\n\n
"
)
f
.
write
(
summary
+
"
\n\n
"
)
return
[
total_time
]
+
gemm_times
def
run_ck_perf
(
model
,
batch_size
,
int8
=
False
,
use_large_k
=
False
):
# CK with fusions
total_time
=
run_perf
(
model
,
batch_size
,
int8
,
True
,
use_large_k
,
False
)[
0
]
# CK without fusions
gemm_times
=
run_perf
(
model
,
batch_size
,
int8
,
True
,
use_large_k
,
True
)
# run model with:
return
[
total_time
]
+
gemm_times
[
1
:]
# RocBlas
# Get gemm info
# CK
# With fusions
# Without fusions
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
device_id
=
get_device_name
()
device_id
=
get_device_name
()
model
=
"/code/bert_base_cased_1_fp16_gpu.onnx"
model
=
"/code/bert_base_cased_1_fp16_gpu.onnx"
run_perf
(
model
,
1
,
True
,
True
,
True
,
True
)
cf
=
CSVFile
()
\ No newline at end of file
cf
.
write_row
([
str
(
datetime
.
datetime
.
now
())])
cf
.
write_row
([
device_id
])
cf
.
write_row
([
model
])
headers
=
[
""
,
"Total Time (ms)"
,
"CK GEMM Time (ms)"
,
"RB GEMM Time (ms)"
,
"GEMM Pack Time (ms)"
,
"Total GEMM Time (ms)"
]
batch_size
=
"1"
# int8:
quantize
=
True
label
=
f
"Int8 / BatchSize:
{
batch_size
}
"
if
quantize
else
f
"FP16 / BatchSize:
{
batch_size
}
"
cf
.
write_row
([
label
])
cf
.
write_row
(
headers
)
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
# fp16:
quantize
=
False
label
=
f
"Int8 / BatchSize:
{
batch_size
}
"
if
quantize
else
f
"FP16 / BatchSize:
{
batch_size
}
"
cf
.
write_row
([
label
])
cf
.
write_row
(
headers
)
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
batch_size
=
"64"
# int8:
quantize
=
True
label
=
f
"Int8 / BatchSize:
{
batch_size
}
"
if
quantize
else
f
"FP16 / BatchSize:
{
batch_size
}
"
cf
.
write_row
([
label
])
cf
.
write_row
(
headers
)
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
# fp16:
quantize
=
False
label
=
f
"Int8 / BatchSize:
{
batch_size
}
"
if
quantize
else
f
"FP16 / BatchSize:
{
batch_size
}
"
cf
.
write_row
([
label
])
cf
.
write_row
(
headers
)
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment