Commit 3191f720 authored by zk's avatar zk
Browse files

修改了部分migraphx代码

parent 39a85c88
......@@ -21,7 +21,9 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dt
在进行编译和运行前,请先激活相关的计算栈环境并配置 HuggingFace 镜像:
```bash
source /opt/dtk/cuda/env.sh
source /opt/dtk/cuda/cuda-12/env.sh
cp -r /opt/dtk/cuda/cuda-12/include /opt/dtk/cuda/cuda-12/include-bak
rm -rf /opt/dtk/cuda/cuda-12/include
export HF_ENDPOINT=https://hf-mirror.com
```
......@@ -206,7 +208,7 @@ cd migraphx_infer
```
2. 运行转换onnx脚本
将简化后的onnx转换为要用migraphx推理的onnx
将简化后的onnx转换为要用migraphx推理的onnx(ground_sim.onnx->ground_opt.onnx)
```bash
bash migraphx_export.bash
```
......
export MIGRAPHX_TRACE_COMPILE=1
# export MIGRAPHX_TRACE_COMPILE=1
MIGRAPHX_ENABLE_GRAPHAPI_REDUCTION=1
MIGRAPHX_ENABLE_LAYERNORM_FUSION=1
migraphx-driver perf --onnx \
../weights/ground_opt_0430.onnx \
../test0525/ground_opt_0509.onnx \
--fp16 \
--output \
../weights/ground_opt_0430.mxr
../test0525/ground_opt_0515.mxr
# ../weights/ground_opt_0430.mxr > migraphx_log.log 2>&1
\ No newline at end of file
......@@ -276,7 +276,7 @@ def benchmark_performance(
if __name__ == "__main__":
model_path = "../weights/ground_opt_0430.onnx"
cache_path = "../weights/ground_opt_0430.mxr"
cache_path = "../weights/ground_opt_0515_1.mxr"
img_path = "../images/in/car_1.jpg"
BOX_TRESHOLD = 0.35
......@@ -288,7 +288,7 @@ if __name__ == "__main__":
model = MIGraphXModel(
model_path,
cache_path=cache_path,
device_id=5,
device_id=2,
force_recompile=False
)
......
migraphx-driver perf --batch 1 \
-n 10 \
--fp16 \
--migraphx ../weights/ground_opt_0430.mxr
\ No newline at end of file
--migraphx ../weights/ground_opt_0515_1.mxr
\ No newline at end of file
......@@ -407,7 +407,8 @@ def optimize_normal_attention(om: ONNXModifier):
"Cast_for_attention_mask",
["attention_mask"],
["Cast_for_attention_mask_output_0"],
to=1,
# to=1, # float32
to=6, # int32
index=mask_next_node.index)
reducesum_node = om.create_node("ReduceSum",
"ReduceSum_for_mask",
......@@ -428,7 +429,7 @@ def optimize_normal_attention(om: ONNXModifier):
# fuse_one_attention(om, f"/transformer/encoder/text_layers.{i}/self_attn/Softmax", "text_token_mask", num_heads=4)
# /transformer/decoder
fuse_one_attention(om, f"/transformer/decoder/layers.{i}/self_attn/Softmax", new_mask, num_heads=8)
# fuse_one_attention(om, f"/transformer/decoder/layers.{i}/ca_text/Softmax", new_mask, num_heads=8)
fuse_one_attention(om, f"/transformer/decoder/layers.{i}/ca_text/Softmax", new_mask, num_heads=8)
om.update_map()
......@@ -613,17 +614,17 @@ def main():
input_onnx_path = sys.argv[1]
output_onnx_path = sys.argv[2]
# input_onnx_path = "ground_sim.onnx"
# output_onnx_path = "ground_sim_0430.onnx"
# output_onnx_path = "ground_sim_0520_new.onnx"
om = ONNXModifier(input_onnx_path)
optimize_where_ndoes(om) # 1. 替换where节点
optimize_transpose_nodes(om) # 2. 优化transpose节点
optmize_sin_cos_block(om) # 3. 优化位置编码
optimize_where_ndoes(om) # 1. 替换where节点
optimize_transpose_nodes(om) # 2. 优化transpose节点
optmize_sin_cos_block(om) # 3. 优化位置编码
om.add_opset_import("com.microsoft", 1)
optimize_normal_attention(om) # 4. 融合bert、transformer中的mha
optimize_normal_attention(om) # 4. 融合bert、transformer中的mha
# optimize_backbone_attention(om) # 5. 融合backbone中的注意力
optimize_ms_deform_attn(om) # 6. 融合多尺度可变形注意力
optimize_bidirect_attention(om) # 7. 优化双向注意力
optimize_ms_deform_attn(om) # 6. 融合多尺度可变形注意力
optimize_bidirect_attention(om) # 7. 优化双向注意力
om.save(output_onnx_path, save_as_external_data=False)
......
......@@ -2,8 +2,8 @@ import onnx
from onnxsim import simplify
from onnxconverter_common import float16
onnx_model_path = "./weights/ground.onnx"
sim_model_path = "./weights/ground_sim.onnx"
onnx_model_path = "../weights/ground.onnx"
sim_model_path = "../weights/ground_sim.onnx"
print("1️⃣ 正在进行 ONNX Simplify...")
model = onnx.load(onnx_model_path)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment