Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenzk
bert_large_squad_onnxruntime
Commits
ed679b4e
Commit
ed679b4e
authored
Aug 04, 2023
by
sugon_cxj
Browse files
add iou of fp16/fp32
parent
75040ddb
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
49 additions
and
39 deletions
+49
-39
main.py
main.py
+49
-39
No files found.
main.py
View file @
ed679b4e
...
...
@@ -10,14 +10,8 @@ def main():
# context = 'ONNX is an open format to represent models. The benefits of using ONNX include interoperability of frameworks and hardware optimization.'
# question = 'What are advantages of ONNX?'
# context = '今天天气晴'
# question = '今天天气怎么样?'
# context = '中国历史有5000年'
# question = '中国历史有多少年?'
context
=
'ROCM是AMD的一个软件平台,用来加速GPU计算'
question
=
'ROCM用来干什么?'
contexts
=
[
'今天天气晴'
,
'中国历史有5000年'
,
'ROCM是AMD的一个软件平台,用来加速GPU计算'
]
questions
=
[
'今天天气怎么样?'
,
'中国历史有多少年?'
,
'ROCM用来干什么?'
]
session
=
InferenceSession
(
"./model.onnx"
,
providers
=
[(
'ROCMExecutionProvider'
,
{
'device_id'
:
'4'
}),
'CPUExecutionProvider'
])
session_fp16
=
InferenceSession
(
"./model_fp16.onnx"
,
providers
=
[(
'ROCMExecutionProvider'
,
{
'device_id'
:
'4'
}),
'CPUExecutionProvider'
])
...
...
@@ -37,42 +31,58 @@ def main():
output_names
.
append
(
session
.
get_outputs
()[
i
].
name
)
print
(
"output_names:"
,
output_names
)
inputs
=
tokenizer
(
question
,
context
,
padding
=
True
,
truncation
=
False
,
return_tensors
=
'np'
)
inputs_int64
=
{
key
:
np
.
array
(
inputs
[
key
],
dtype
=
np
.
int64
)
for
key
in
inputs
}
print
(
"inputs:"
,
tokenizer
.
decode
(
inputs
.
input_ids
[
0
]))
IOU
=
0
n
=
len
(
contexts
)
for
i
in
range
(
n
):
inputs
=
tokenizer
(
questions
[
i
],
contexts
[
i
],
padding
=
True
,
truncation
=
False
,
return_tensors
=
'np'
)
inputs_int64
=
{
key
:
np
.
array
(
inputs
[
key
],
dtype
=
np
.
int64
)
for
key
in
inputs
}
print
(
"inputs:"
,
tokenizer
.
decode
(
inputs
.
input_ids
[
0
]))
input_ids_zeros
=
np
.
zeros
((
1
,
input_shapes
[
0
][
1
]),
np
.
int64
)
input_mask_zeros
=
np
.
zeros
((
1
,
input_shapes
[
0
][
1
]),
np
.
int64
)
segment_ids_zeros
=
np
.
zeros
((
1
,
input_shapes
[
0
][
1
]),
np
.
int64
)
for
i
in
range
(
len
(
inputs
.
input_ids
[
0
])):
input_ids_zeros
[
0
][
i
]
=
inputs
.
input_ids
[
0
][
i
]
input_mask_zeros
[
0
][
i
]
=
inputs
.
attention_mask
[
0
][
i
]
segment_ids_zeros
[
0
][
i
]
=
inputs
.
token_type_ids
[
0
][
i
]
input_ids_zeros
=
np
.
zeros
((
1
,
384
),
np
.
int64
)
input_mask_zeros
=
np
.
zeros
((
1
,
384
),
np
.
int64
)
segment_ids_zeros
=
np
.
zeros
((
1
,
384
),
np
.
int64
)
onnx_input
=
{
input_names
[
0
]:
input_ids_zeros
,
input_names
[
1
]:
input_mask_zeros
,
input_names
[
2
]:
segment_ids_zeros
}
for
i
in
range
(
len
(
inputs
.
input_ids
[
0
])):
input_ids_zeros
[
0
][
i
]
=
inputs
.
input_ids
[
0
][
i
]
input_mask_zeros
[
0
][
i
]
=
inputs
.
attention_mask
[
0
][
i
]
segment_ids_zeros
[
0
][
i
]
=
inputs
.
token_type_ids
[
0
][
i
]
for
i
in
range
(
10
):
t1
=
time
.
perf_counter
()
outputs
=
session
.
run
(
input_feed
=
dict
(
onnx_input
),
output_names
=
None
)
t2
=
time
.
perf_counter
()
print
(
"fp32 infer time:"
,
i
,
t2
-
t1
)
# print(outputs)
answer_start_index
=
outputs
[
0
].
argmax
()
answer_end_index
=
outputs
[
1
].
argmax
()
predict_answer_tokens
=
inputs
.
input_ids
[
0
,
answer_start_index
:
answer_end_index
+
1
]
print
(
"fp32 results:"
,
tokenizer
.
decode
(
predict_answer_tokens
))
onnx_input
=
{
input_names
[
0
]:
input_ids_zeros
,
input_names
[
1
]:
input_mask_zeros
,
input_names
[
2
]:
segment_ids_zeros
}
for
i
in
range
(
10
):
t1
=
time
.
perf_counter
()
outputs_fp16
=
session_fp16
.
run
(
input_feed
=
dict
(
onnx_input
),
output_names
=
None
)
t2
=
time
.
perf_counter
()
print
(
"fp16 infer time:"
,
i
,
t2
-
t1
)
# print(outputs_fp16)
answer_start_index_fp16
=
outputs_fp16
[
0
].
argmax
()
answer_end_index_fp16
=
outputs_fp16
[
1
].
argmax
()
predict_answer_tokens_fp16
=
inputs
.
input_ids
[
0
,
answer_start_index_fp16
:
answer_end_index_fp16
+
1
]
print
(
"fp16 results:"
,
tokenizer
.
decode
(
predict_answer_tokens_fp16
))
for
i
in
range
(
10
):
t1
=
time
.
perf_counter
()
outputs
=
session
.
run
(
input_feed
=
dict
(
onnx_input
),
output_names
=
None
)
t2
=
time
.
perf_counter
()
print
(
"fp32:"
,
i
,
t2
-
t1
)
answer_start_index
=
outputs
[
0
].
argmax
()
answer_end_index
=
outputs
[
1
].
argmax
()
predict_answer_tokens
=
inputs
.
input_ids
[
0
,
answer_start_index
:
answer_end_index
+
1
]
print
(
"results fp32:"
,
tokenizer
.
decode
(
predict_answer_tokens
))
answer_start_index_max
=
answer_start_index
if
answer_start_index
>
answer_start_index_fp16
else
answer_start_index_fp16
answer_end_index_min
=
answer_end_index
if
answer_end_index
<
answer_end_index_fp16
else
answer_end_index_fp16
answer_start_index_min
=
answer_start_index
if
answer_start_index
<
answer_start_index_fp16
else
answer_start_index_fp16
answer_end_index_max
=
answer_end_index
if
answer_end_index
>
answer_end_index_fp16
else
answer_end_index_fp16
for
i
in
range
(
10
):
t1
=
time
.
perf_counter
()
outputs_fp16
=
session_fp16
.
run
(
input_feed
=
dict
(
onnx_input
),
output_names
=
None
)
t2
=
time
.
perf_counter
()
print
(
"fp16:"
,
i
,
t2
-
t1
)
answer_start_index_fp16
=
outputs_fp16
[
0
].
argmax
()
answer_end_index_fp16
=
outputs_fp16
[
1
].
argmax
()
predict_answer_tokens_fp16
=
inputs
.
input_ids
[
0
,
answer_start_index_fp16
:
answer_end_index_fp16
+
1
]
print
(
"results fp16:"
,
tokenizer
.
decode
(
predict_answer_tokens_fp16
))
iou
=
(
answer_end_index_min
-
answer_start_index_max
+
1
)
/
(
answer_end_index_max
-
answer_start_index_min
+
1
)
print
(
"result iou of fp16/fp32:"
,
iou
)
IOU
+=
iou
print
(
"average iou:"
,
IOU
/
n
)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment