Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
028280dc
Commit
028280dc
authored
Aug 01, 2023
by
Alan Turner
Browse files
Formatting
parent
e1bd6573
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
44 additions
and
27 deletions
+44
-27
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+11
-7
tools/gemm_perf.py
tools/gemm_perf.py
+33
-20
No files found.
src/rewrite_quantization.cpp
View file @
028280dc
...
@@ -64,19 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -64,19 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant
=
qt
.
max
();
max_quant
=
qt
.
max
();
min_quant
=
qt
.
min
();
min_quant
=
qt
.
min
();
});
});
if
(
enabled
(
MIGRAPHX_BROADCAST_Q
{}))
if
(
enabled
(
MIGRAPHX_BROADCAST_Q
{}))
{
{
auto
s
=
add_zero_point
->
get_shape
();
auto
s
=
add_zero_point
->
get_shape
();
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
auto
min_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
min_quant
}});
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
auto
max_arg
=
m
.
add_literal
(
literal
{
shape
{
s
.
type
()},
{
max_quant
}});
// auto min_mbcast =
// auto min_mbcast =
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), min_arg);
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}),
// min_arg);
// auto max_mbcast =
// auto max_mbcast =
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}), max_arg);
// m.insert_instruction(ins, make_op("multibroadcast", {{"out_lens", s.lens()}}),
// max_arg);
// auto saturate =
// auto saturate =
// m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
// m.insert_instruction(ins, make_op("clip"), add_zero_point, min_mbcast, max_mbcast);
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
auto
saturate
=
insert_common_op
(
m
,
ins
,
make_op
(
"clip"
),
{
add_zero_point
,
min_arg
,
max_arg
});
m
.
replace_instruction
(
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
}
}
...
@@ -88,7 +91,8 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -88,7 +91,8 @@ void apply_quantizelinear(module& m, instruction_ref ins)
auto
min_arg
=
m
.
add_literal
(
literal
(
s
,
min_data
));
auto
min_arg
=
m
.
add_literal
(
literal
(
s
,
min_data
));
auto
max_arg
=
m
.
add_literal
(
literal
(
s
,
max_data
));
auto
max_arg
=
m
.
add_literal
(
literal
(
s
,
max_data
));
auto
saturate
=
m
.
insert_instruction
(
ins
,
make_op
(
"clip"
),
add_zero_point
,
min_arg
,
max_arg
);
auto
saturate
=
m
.
insert_instruction
(
ins
,
make_op
(
"clip"
),
add_zero_point
,
min_arg
,
max_arg
);
m
.
replace_instruction
(
m
.
replace_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
ins
->
get_shape
().
type
()}}),
saturate
);
}
}
...
...
tools/gemm_perf.py
View file @
028280dc
import
subprocess
,
csv
,
re
,
datetime
import
subprocess
,
csv
,
re
,
datetime
class
CSVFile
:
class
CSVFile
:
def
__init__
(
self
,
path
=
"output.csv"
):
def
__init__
(
self
,
path
=
"output.csv"
):
self
.
path
=
path
self
.
path
=
path
...
@@ -18,7 +20,13 @@ def get_device_name():
...
@@ -18,7 +20,13 @@ def get_device_name():
matches
=
re
.
findall
(
"gfx\d*[a-z]*"
,
str
(
out
.
stdout
))
matches
=
re
.
findall
(
"gfx\d*[a-z]*"
,
str
(
out
.
stdout
))
return
matches
[
0
]
return
matches
[
0
]
def
run_perf
(
model
,
batch_size
,
int8
=
False
,
use_ck
=
False
,
use_large_k
=
False
,
disable_fusion
=
False
):
def
run_perf
(
model
,
batch_size
,
int8
=
False
,
use_ck
=
False
,
use_large_k
=
False
,
disable_fusion
=
False
):
env_vars
=
""
env_vars
=
""
if
use_ck
:
if
use_ck
:
env_vars
+=
"MIGRAPHX_ENABLE_CK=1 "
env_vars
+=
"MIGRAPHX_ENABLE_CK=1 "
...
@@ -28,10 +36,7 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
...
@@ -28,10 +36,7 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
env_vars
+=
"MIGRAPHX_DISABLE_CK_FUSION=1 "
env_vars
+=
"MIGRAPHX_DISABLE_CK_FUSION=1 "
int8_str
=
"--int8"
if
int8
else
""
int8_str
=
"--int8"
if
int8
else
""
cmd
=
f
"
{
env_vars
}
../build/bin/driver perf
{
model
}
--fill1 input_ids --input-dim @input_ids
{
batch_size
}
384 --batch
{
batch_size
}
--fp16
{
int8_str
}
--exhaustive-tune"
cmd
=
f
"
{
env_vars
}
../build/bin/driver perf
{
model
}
--fill1 input_ids --input-dim @input_ids
{
batch_size
}
384 --batch
{
batch_size
}
--fp16
{
int8_str
}
--exhaustive-tune"
out
=
subprocess
.
run
(
cmd
,
out
=
subprocess
.
run
(
cmd
,
capture_output
=
True
,
check
=
True
,
shell
=
True
)
capture_output
=
True
,
check
=
True
,
shell
=
True
)
summary
=
re
.
findall
(
"Summary.*"
,
str
(
out
.
stdout
))[
0
].
replace
(
"
\\
n"
,
"
\n
"
)
summary
=
re
.
findall
(
"Summary.*"
,
str
(
out
.
stdout
))[
0
].
replace
(
"
\\
n"
,
"
\n
"
)
total_time
=
re
.
findall
(
"Total time: \d+\.\d*"
,
summary
)[
0
]
total_time
=
re
.
findall
(
"Total time: \d+\.\d*"
,
summary
)[
0
]
...
@@ -43,7 +48,8 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
...
@@ -43,7 +48,8 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
else
:
else
:
ck_gemm_time
=
"0.0"
ck_gemm_time
=
"0.0"
rb_gemm_time
=
re
.
findall
(
"gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*"
,
summary
)
rb_gemm_time
=
re
.
findall
(
"gpu::quant_gemm: \d+\.\d*|gpu::gemm: \d+\.\d*"
,
summary
)
if
rb_gemm_time
:
if
rb_gemm_time
:
rb_gemm_time
=
re
.
findall
(
"\d+\.\d*"
,
rb_gemm_time
[
0
])[
0
]
rb_gemm_time
=
re
.
findall
(
"\d+\.\d*"
,
rb_gemm_time
[
0
])[
0
]
else
:
else
:
...
@@ -67,6 +73,7 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
...
@@ -67,6 +73,7 @@ def run_perf(model, batch_size, int8=False, use_ck=False, use_large_k=False, dis
return
[
total_time
]
+
gemm_times
return
[
total_time
]
+
gemm_times
def
run_ck_perf
(
model
,
batch_size
,
int8
=
False
,
use_large_k
=
False
):
def
run_ck_perf
(
model
,
batch_size
,
int8
=
False
,
use_large_k
=
False
):
# CK with fusions
# CK with fusions
total_time
=
run_perf
(
model
,
batch_size
,
int8
,
True
,
use_large_k
,
False
)[
0
]
total_time
=
run_perf
(
model
,
batch_size
,
int8
,
True
,
use_large_k
,
False
)[
0
]
...
@@ -76,7 +83,6 @@ def run_ck_perf(model, batch_size, int8=False, use_large_k=False):
...
@@ -76,7 +83,6 @@ def run_ck_perf(model, batch_size, int8=False, use_large_k=False):
return
[
total_time
]
+
gemm_times
[
1
:]
return
[
total_time
]
+
gemm_times
[
1
:]
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
device_id
=
get_device_name
()
device_id
=
get_device_name
()
model
=
"/code/bert_base_cased_1_fp16_gpu.onnx"
model
=
"/code/bert_base_cased_1_fp16_gpu.onnx"
...
@@ -84,7 +90,10 @@ if __name__ == "__main__":
...
@@ -84,7 +90,10 @@ if __name__ == "__main__":
cf
.
write_row
([
str
(
datetime
.
datetime
.
now
())])
cf
.
write_row
([
str
(
datetime
.
datetime
.
now
())])
cf
.
write_row
([
device_id
])
cf
.
write_row
([
device_id
])
cf
.
write_row
([
model
])
cf
.
write_row
([
model
])
headers
=
[
""
,
"Total Time (ms)"
,
"CK GEMM Time (ms)"
,
"RB GEMM Time (ms)"
,
"GEMM Pack Time (ms)"
,
"Total GEMM Time (ms)"
]
headers
=
[
""
,
"Total Time (ms)"
,
"CK GEMM Time (ms)"
,
"RB GEMM Time (ms)"
,
"GEMM Pack Time (ms)"
,
"Total GEMM Time (ms)"
]
batch_size
=
"1"
batch_size
=
"1"
# int8:
# int8:
...
@@ -95,7 +104,8 @@ if __name__ == "__main__":
...
@@ -95,7 +104,8 @@ if __name__ == "__main__":
# CK Only
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
cf
.
write_row
()
...
@@ -108,7 +118,8 @@ if __name__ == "__main__":
...
@@ -108,7 +118,8 @@ if __name__ == "__main__":
# CK Only
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
cf
.
write_row
()
...
@@ -122,7 +133,8 @@ if __name__ == "__main__":
...
@@ -122,7 +133,8 @@ if __name__ == "__main__":
# CK Only
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
cf
.
write_row
()
...
@@ -135,7 +147,8 @@ if __name__ == "__main__":
...
@@ -135,7 +147,8 @@ if __name__ == "__main__":
# CK Only
# CK Only
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
cf
.
write_row
([
"CK"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
True
))
# CK + rocBLAS (k>2048)
# CK + rocBLAS (k>2048)
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
cf
.
write_row
([
"CK + rocBLAS(k>2048)"
]
+
run_ck_perf
(
model
,
batch_size
,
quantize
,
False
))
# rocBLAS Only
# rocBLAS Only
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
([
"rocBLAS"
]
+
run_perf
(
model
,
batch_size
,
quantize
))
cf
.
write_row
()
cf
.
write_row
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment