修改了部分migraphx代码

3191f720 · zk · 39a85c88 · 3191f720 · 3191f720 · 3191f720
Commit 3191f720 authored May 28, 2026 by zk
6 changed files
--- a/README.md
+++ b/README.md
@@ -21,7 +21,9 @@ docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.5.1-ubuntu22.04-dt
 在进行编译和运行前，请先激活相关的计算栈环境并配置 HuggingFace 镜像：
 ```bash
-source /opt/dtk/cuda/env.sh
+source /opt/dtk/cuda/cuda-12/env.sh
+cp -r /opt/dtk/cuda/cuda-12/include /opt/dtk/cuda/cuda-12/include-bak
+rm -rf /opt/dtk/cuda/cuda-12/include
 export HF_ENDPOINT=https://hf-mirror.com
 ```
@@ -206,7 +208,7 @@ cd migraphx_infer
 ```
 2. 运行转换onnx脚本
-将简化后的onnx转换为要用migraphx推理的onnx
+将简化后的onnx转换为要用migraphx推理的onnx(ground_sim.onnx->ground_opt.onnx)
 ```bash
 bash migraphx_export.bash
 ```

--- a/migraphx_infer/migraphx_export.bash
+++ b/migraphx_infer/migraphx_export.bash
-export MIGRAPHX_TRACE_COMPILE=1
+# export MIGRAPHX_TRACE_COMPILE=1
+MIGRAPHX_ENABLE_GRAPHAPI_REDUCTION=1
+MIGRAPHX_ENABLE_LAYERNORM_FUSION=1
 migraphx-driver perf --onnx \
-    ../weights/ground_opt_0430.onnx \
+    ../test0525/ground_opt_0509.onnx \
    --fp16 \
    --output \
-    ../weights/ground_opt_0430.mxr
+    ../test0525/ground_opt_0515.mxr
    # ../weights/ground_opt_0430.mxr > migraphx_log.log 2>&1
\ No newline at end of file
--- a/migraphx_infer/migraphx_infer.py
+++ b/migraphx_infer/migraphx_infer.py
@@ -276,7 +276,7 @@ def benchmark_performance(
 if __name__ == "__main__":
    model_path = "../weights/ground_opt_0430.onnx"
-    cache_path = "../weights/ground_opt_0430.mxr"
+    cache_path = "../weights/ground_opt_0515_1.mxr"
    img_path = "../images/in/car_1.jpg"
    BOX_TRESHOLD = 0.35
@@ -288,7 +288,7 @@ if __name__ == "__main__":
    model = MIGraphXModel(
        model_path,
        cache_path=cache_path,
-        device_id=5,
+        device_id=2,
        force_recompile=False 
    )

--- a/migraphx_infer/migraphx_perf.bash
+++ b/migraphx_infer/migraphx_perf.bash
 migraphx-driver perf --batch 1 \
    -n 10 \
    --fp16 \
-    --migraphx ../weights/ground_opt_0430.mxr
+    --migraphx ../weights/ground_opt_0515_1.mxr
\ No newline at end of file
--- a/migraphx_infer/modify_onnx_0430.py
+++ b/migraphx_infer/modify_onnx_0430.py
@@ -407,7 +407,8 @@ def optimize_normal_attention(om: ONNXModifier):
                                   "Cast_for_attention_mask",
                                   ["attention_mask"],
                                   ["Cast_for_attention_mask_output_0"],
-                                   to=1,
+                                   # to=1,  # float32
+                                   to=6,  # int32
                                   index=mask_next_node.index)
        reducesum_node = om.create_node("ReduceSum", 
                                        "ReduceSum_for_mask", 
@@ -428,7 +429,7 @@ def optimize_normal_attention(om: ONNXModifier):
        # fuse_one_attention(om, f"/transformer/encoder/text_layers.{i}/self_attn/Softmax", "text_token_mask", num_heads=4)
        # /transformer/decoder
        fuse_one_attention(om, f"/transformer/decoder/layers.{i}/self_attn/Softmax", new_mask, num_heads=8)
-        # fuse_one_attention(om, f"/transformer/decoder/layers.{i}/ca_text/Softmax", new_mask, num_heads=8)
+        fuse_one_attention(om, f"/transformer/decoder/layers.{i}/ca_text/Softmax", new_mask, num_heads=8)
    om.update_map()
@@ -613,17 +614,17 @@ def main():
    input_onnx_path = sys.argv[1]
    output_onnx_path = sys.argv[2]
    # input_onnx_path = "ground_sim.onnx"
-    # output_onnx_path = "ground_sim_0430.onnx"
+    # output_onnx_path = "ground_sim_0520_new.onnx"
    om = ONNXModifier(input_onnx_path)
-    optimize_where_ndoes(om)       # 1. 替换where节点
+    optimize_where_ndoes(om)         # 1. 替换where节点
-    optimize_transpose_nodes(om)   # 2. 优化transpose节点
+    optimize_transpose_nodes(om)     # 2. 优化transpose节点
-    optmize_sin_cos_block(om)      # 3. 优化位置编码
+    optmize_sin_cos_block(om)        # 3. 优化位置编码
    om.add_opset_import("com.microsoft", 1)
-    optimize_normal_attention(om)  # 4. 融合bert、transformer中的mha
+    optimize_normal_attention(om)    # 4. 融合bert、transformer中的mha
    # optimize_backbone_attention(om)  # 5. 融合backbone中的注意力
-    optimize_ms_deform_attn(om)  # 6. 融合多尺度可变形注意力
+    optimize_ms_deform_attn(om)      # 6. 融合多尺度可变形注意力
-    optimize_bidirect_attention(om)    # 7. 优化双向注意力
+    optimize_bidirect_attention(om)  # 7. 优化双向注意力
    om.save(output_onnx_path, save_as_external_data=False)

--- a/migraphx_infer/onnx_sim.py
+++ b/migraphx_infer/onnx_sim.py
@@ -2,8 +2,8 @@ import onnx
 from onnxsim import simplify
 from onnxconverter_common import float16
-onnx_model_path = "./weights/ground.onnx"
+onnx_model_path = "../weights/ground.onnx"
-sim_model_path = "./weights/ground_sim.onnx"
+sim_model_path = "../weights/ground_sim.onnx"
 print("1️⃣ 正在进行 ONNX Simplify...")
 model = onnx.load(onnx_model_path)