Commit 3191f720 authored by zk's avatar zk
Browse files

修改了部分migraphx代码

parent 39a85c88
...@@ -21,7 +21,9 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dt ...@@ -21,7 +21,9 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dt
在进行编译和运行前,请先激活相关的计算栈环境并配置 HuggingFace 镜像: 在进行编译和运行前,请先激活相关的计算栈环境并配置 HuggingFace 镜像:
```bash ```bash
source /opt/dtk/cuda/env.sh source /opt/dtk/cuda/cuda-12/env.sh
cp -r /opt/dtk/cuda/cuda-12/include /opt/dtk/cuda/cuda-12/include-bak
rm -rf /opt/dtk/cuda/cuda-12/include
export HF_ENDPOINT=https://hf-mirror.com export HF_ENDPOINT=https://hf-mirror.com
``` ```
...@@ -206,7 +208,7 @@ cd migraphx_infer ...@@ -206,7 +208,7 @@ cd migraphx_infer
``` ```
2. 运行转换onnx脚本 2. 运行转换onnx脚本
将简化后的onnx转换为要用migraphx推理的onnx 将简化后的onnx转换为要用migraphx推理的onnx(ground_sim.onnx->ground_opt.onnx)
```bash ```bash
bash migraphx_export.bash bash migraphx_export.bash
``` ```
......
export MIGRAPHX_TRACE_COMPILE=1 # export MIGRAPHX_TRACE_COMPILE=1
MIGRAPHX_ENABLE_GRAPHAPI_REDUCTION=1
MIGRAPHX_ENABLE_LAYERNORM_FUSION=1
migraphx-driver perf --onnx \ migraphx-driver perf --onnx \
../weights/ground_opt_0430.onnx \ ../test0525/ground_opt_0509.onnx \
--fp16 \ --fp16 \
--output \ --output \
../weights/ground_opt_0430.mxr ../test0525/ground_opt_0515.mxr
# ../weights/ground_opt_0430.mxr > migraphx_log.log 2>&1 # ../weights/ground_opt_0430.mxr > migraphx_log.log 2>&1
\ No newline at end of file
...@@ -276,7 +276,7 @@ def benchmark_performance( ...@@ -276,7 +276,7 @@ def benchmark_performance(
if __name__ == "__main__": if __name__ == "__main__":
model_path = "../weights/ground_opt_0430.onnx" model_path = "../weights/ground_opt_0430.onnx"
cache_path = "../weights/ground_opt_0430.mxr" cache_path = "../weights/ground_opt_0515_1.mxr"
img_path = "../images/in/car_1.jpg" img_path = "../images/in/car_1.jpg"
BOX_TRESHOLD = 0.35 BOX_TRESHOLD = 0.35
...@@ -288,7 +288,7 @@ if __name__ == "__main__": ...@@ -288,7 +288,7 @@ if __name__ == "__main__":
model = MIGraphXModel( model = MIGraphXModel(
model_path, model_path,
cache_path=cache_path, cache_path=cache_path,
device_id=5, device_id=2,
force_recompile=False force_recompile=False
) )
......
migraphx-driver perf --batch 1 \ migraphx-driver perf --batch 1 \
-n 10 \ -n 10 \
--fp16 \ --fp16 \
--migraphx ../weights/ground_opt_0430.mxr --migraphx ../weights/ground_opt_0515_1.mxr
\ No newline at end of file \ No newline at end of file
...@@ -407,7 +407,8 @@ def optimize_normal_attention(om: ONNXModifier): ...@@ -407,7 +407,8 @@ def optimize_normal_attention(om: ONNXModifier):
"Cast_for_attention_mask", "Cast_for_attention_mask",
["attention_mask"], ["attention_mask"],
["Cast_for_attention_mask_output_0"], ["Cast_for_attention_mask_output_0"],
to=1, # to=1, # float32
to=6, # int32
index=mask_next_node.index) index=mask_next_node.index)
reducesum_node = om.create_node("ReduceSum", reducesum_node = om.create_node("ReduceSum",
"ReduceSum_for_mask", "ReduceSum_for_mask",
...@@ -428,7 +429,7 @@ def optimize_normal_attention(om: ONNXModifier): ...@@ -428,7 +429,7 @@ def optimize_normal_attention(om: ONNXModifier):
# fuse_one_attention(om, f"/transformer/encoder/text_layers.{i}/self_attn/Softmax", "text_token_mask", num_heads=4) # fuse_one_attention(om, f"/transformer/encoder/text_layers.{i}/self_attn/Softmax", "text_token_mask", num_heads=4)
# /transformer/decoder # /transformer/decoder
fuse_one_attention(om, f"/transformer/decoder/layers.{i}/self_attn/Softmax", new_mask, num_heads=8) fuse_one_attention(om, f"/transformer/decoder/layers.{i}/self_attn/Softmax", new_mask, num_heads=8)
# fuse_one_attention(om, f"/transformer/decoder/layers.{i}/ca_text/Softmax", new_mask, num_heads=8) fuse_one_attention(om, f"/transformer/decoder/layers.{i}/ca_text/Softmax", new_mask, num_heads=8)
om.update_map() om.update_map()
...@@ -613,17 +614,17 @@ def main(): ...@@ -613,17 +614,17 @@ def main():
input_onnx_path = sys.argv[1] input_onnx_path = sys.argv[1]
output_onnx_path = sys.argv[2] output_onnx_path = sys.argv[2]
# input_onnx_path = "ground_sim.onnx" # input_onnx_path = "ground_sim.onnx"
# output_onnx_path = "ground_sim_0430.onnx" # output_onnx_path = "ground_sim_0520_new.onnx"
om = ONNXModifier(input_onnx_path) om = ONNXModifier(input_onnx_path)
optimize_where_ndoes(om) # 1. 替换where节点 optimize_where_ndoes(om) # 1. 替换where节点
optimize_transpose_nodes(om) # 2. 优化transpose节点 optimize_transpose_nodes(om) # 2. 优化transpose节点
optmize_sin_cos_block(om) # 3. 优化位置编码 optmize_sin_cos_block(om) # 3. 优化位置编码
om.add_opset_import("com.microsoft", 1) om.add_opset_import("com.microsoft", 1)
optimize_normal_attention(om) # 4. 融合bert、transformer中的mha optimize_normal_attention(om) # 4. 融合bert、transformer中的mha
# optimize_backbone_attention(om) # 5. 融合backbone中的注意力 # optimize_backbone_attention(om) # 5. 融合backbone中的注意力
optimize_ms_deform_attn(om) # 6. 融合多尺度可变形注意力 optimize_ms_deform_attn(om) # 6. 融合多尺度可变形注意力
optimize_bidirect_attention(om) # 7. 优化双向注意力 optimize_bidirect_attention(om) # 7. 优化双向注意力
om.save(output_onnx_path, save_as_external_data=False) om.save(output_onnx_path, save_as_external_data=False)
......
...@@ -2,8 +2,8 @@ import onnx ...@@ -2,8 +2,8 @@ import onnx
from onnxsim import simplify from onnxsim import simplify
from onnxconverter_common import float16 from onnxconverter_common import float16
onnx_model_path = "./weights/ground.onnx" onnx_model_path = "../weights/ground.onnx"
sim_model_path = "./weights/ground_sim.onnx" sim_model_path = "../weights/ground_sim.onnx"
print("1️⃣ 正在进行 ONNX Simplify...") print("1️⃣ 正在进行 ONNX Simplify...")
model = onnx.load(onnx_model_path) model = onnx.load(onnx_model_path)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment