Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
zk
GroundingDINO-DCU-Optimized
Commits
3191f720
Commit
3191f720
authored
May 28, 2026
by
zk
Browse files
修改了部分migraphx代码
parent
39a85c88
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
24 additions
and
19 deletions
+24
-19
README.md
README.md
+4
-2
migraphx_infer/migraphx_export.bash
migraphx_infer/migraphx_export.bash
+5
-3
migraphx_infer/migraphx_infer.py
migraphx_infer/migraphx_infer.py
+2
-2
migraphx_infer/migraphx_perf.bash
migraphx_infer/migraphx_perf.bash
+1
-1
migraphx_infer/modify_onnx_0522.py
migraphx_infer/modify_onnx_0522.py
+10
-9
migraphx_infer/onnx_sim.py
migraphx_infer/onnx_sim.py
+2
-2
No files found.
README.md
View file @
3191f720
...
...
@@ -21,7 +21,9 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dt
在进行编译和运行前,请先激活相关的计算栈环境并配置 HuggingFace 镜像:
```
bash
source
/opt/dtk/cuda/env.sh
source
/opt/dtk/cuda/cuda-12/env.sh
cp
-r
/opt/dtk/cuda/cuda-12/include /opt/dtk/cuda/cuda-12/include-bak
rm
-rf
/opt/dtk/cuda/cuda-12/include
export
HF_ENDPOINT
=
https://hf-mirror.com
```
...
...
@@ -206,7 +208,7 @@ cd migraphx_infer
```
2.
运行转换onnx脚本
将简化后的onnx转换为要用migraphx推理的onnx
将简化后的onnx转换为要用migraphx推理的onnx
(ground_sim.onnx->ground_opt.onnx)
```
bash
bash migraphx_export.bash
```
...
...
migraphx_infer/migraphx_export.bash
View file @
3191f720
export
MIGRAPHX_TRACE_COMPILE
=
1
# export MIGRAPHX_TRACE_COMPILE=1
MIGRAPHX_ENABLE_GRAPHAPI_REDUCTION
=
1
MIGRAPHX_ENABLE_LAYERNORM_FUSION
=
1
migraphx-driver perf
--onnx
\
../
weights
/ground_opt_0
430
.onnx
\
../
test0525
/ground_opt_0
509
.onnx
\
--fp16
\
--output
\
../
weights
/ground_opt_0
430
.mxr
../
test0525
/ground_opt_0
515
.mxr
# ../weights/ground_opt_0430.mxr > migraphx_log.log 2>&1
\ No newline at end of file
migraphx_infer/migraphx_infer.py
View file @
3191f720
...
...
@@ -276,7 +276,7 @@ def benchmark_performance(
if
__name__
==
"__main__"
:
model_path
=
"../weights/ground_opt_0430.onnx"
cache_path
=
"../weights/ground_opt_0
430
.mxr"
cache_path
=
"../weights/ground_opt_0
515_1
.mxr"
img_path
=
"../images/in/car_1.jpg"
BOX_TRESHOLD
=
0.35
...
...
@@ -288,7 +288,7 @@ if __name__ == "__main__":
model
=
MIGraphXModel
(
model_path
,
cache_path
=
cache_path
,
device_id
=
5
,
device_id
=
2
,
force_recompile
=
False
)
...
...
migraphx_infer/migraphx_perf.bash
View file @
3191f720
migraphx-driver perf
--batch
1
\
-n
10
\
--fp16
\
--migraphx
../weights/ground_opt_0430.mxr
\ No newline at end of file
--migraphx
../weights/ground_opt_0515_1.mxr
\ No newline at end of file
migraphx_infer/modify_onnx_0
430
.py
→
migraphx_infer/modify_onnx_0
522
.py
View file @
3191f720
...
...
@@ -407,7 +407,8 @@ def optimize_normal_attention(om: ONNXModifier):
"Cast_for_attention_mask"
,
[
"attention_mask"
],
[
"Cast_for_attention_mask_output_0"
],
to
=
1
,
# to=1, # float32
to
=
6
,
# int32
index
=
mask_next_node
.
index
)
reducesum_node
=
om
.
create_node
(
"ReduceSum"
,
"ReduceSum_for_mask"
,
...
...
@@ -428,7 +429,7 @@ def optimize_normal_attention(om: ONNXModifier):
# fuse_one_attention(om, f"/transformer/encoder/text_layers.{i}/self_attn/Softmax", "text_token_mask", num_heads=4)
# /transformer/decoder
fuse_one_attention
(
om
,
f
"/transformer/decoder/layers.
{
i
}
/self_attn/Softmax"
,
new_mask
,
num_heads
=
8
)
#
fuse_one_attention(om, f"/transformer/decoder/layers.{i}/ca_text/Softmax", new_mask, num_heads=8)
fuse_one_attention
(
om
,
f
"/transformer/decoder/layers.
{
i
}
/ca_text/Softmax"
,
new_mask
,
num_heads
=
8
)
om
.
update_map
()
...
...
@@ -613,17 +614,17 @@ def main():
input_onnx_path
=
sys
.
argv
[
1
]
output_onnx_path
=
sys
.
argv
[
2
]
# input_onnx_path = "ground_sim.onnx"
# output_onnx_path = "ground_sim_0
430
.onnx"
# output_onnx_path = "ground_sim_0
520_new
.onnx"
om
=
ONNXModifier
(
input_onnx_path
)
optimize_where_ndoes
(
om
)
# 1. 替换where节点
optimize_transpose_nodes
(
om
)
# 2. 优化transpose节点
optmize_sin_cos_block
(
om
)
# 3. 优化位置编码
optimize_where_ndoes
(
om
)
# 1. 替换where节点
optimize_transpose_nodes
(
om
)
# 2. 优化transpose节点
optmize_sin_cos_block
(
om
)
# 3. 优化位置编码
om
.
add_opset_import
(
"com.microsoft"
,
1
)
optimize_normal_attention
(
om
)
# 4. 融合bert、transformer中的mha
optimize_normal_attention
(
om
)
# 4. 融合bert、transformer中的mha
# optimize_backbone_attention(om) # 5. 融合backbone中的注意力
optimize_ms_deform_attn
(
om
)
# 6. 融合多尺度可变形注意力
optimize_bidirect_attention
(
om
)
# 7. 优化双向注意力
optimize_ms_deform_attn
(
om
)
# 6. 融合多尺度可变形注意力
optimize_bidirect_attention
(
om
)
# 7. 优化双向注意力
om
.
save
(
output_onnx_path
,
save_as_external_data
=
False
)
...
...
migraphx_infer/onnx_sim.py
View file @
3191f720
...
...
@@ -2,8 +2,8 @@ import onnx
from
onnxsim
import
simplify
from
onnxconverter_common
import
float16
onnx_model_path
=
"./weights/ground.onnx"
sim_model_path
=
"./weights/ground_sim.onnx"
onnx_model_path
=
".
.
/weights/ground.onnx"
sim_model_path
=
".
.
/weights/ground_sim.onnx"
print
(
"1️⃣ 正在进行 ONNX Simplify..."
)
model
=
onnx
.
load
(
onnx_model_path
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment