增加offloadcopy、批量处理图片等功能

b51b1aac · shangxl · be0d1a01 · b51b1aac · b51b1aac · b51b1aac
Commit b51b1aac authored Sep 09, 2025 by shangxl
20 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 3.5)
 project(DeepLabV3)
 # 设置编译器
-set(CMAKE_CXX_COMPILER g++)
+set(CMAKE_CXX_COMPILER hipcc)
 set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} -std=c++17) # 2.2版本以上需要c++17
 set(CMAKE_BUILD_TYPE release)

--- a/Doc/Images/000001.jpg
+++ b/Doc/Images/000001.jpg
--- a/Doc/Images/000002.jpg
+++ b/Doc/Images/000002.jpg
--- a/Doc/Images/000003.jpg
+++ b/Doc/Images/000003.jpg
--- a/Doc/Images/Result_000001.jpg
+++ b/Doc/Images/Result_000001.jpg
--- a/Doc/Images/Result_000002.jpg
+++ b/Doc/Images/Result_000002.jpg
--- a/Doc/Images/Result_000003.jpg
+++ b/Doc/Images/Result_000003.jpg
--- a/Doc/Images/deeplabv3_02.png
+++ b/Doc/Images/deeplabv3_02.png
--- a/Doc/Images/deeplabv3_03.png
+++ b/Doc/Images/deeplabv3_03.png
--- a/Doc/Tutorial_Cpp.md
+++ b/Doc/Tutorial_Cpp.md
@@ -12,47 +12,98 @@
 在模型初始化的过程中，首先采用parse_onnx()函数根据提供的模型地址加载图像分割deeplabv3的onnx模型，保存在net中。其次，通过net.get_parameter_shapes()获取deeplabv3模型的输入属性，包含inputName和inputShape。最后，完成模型加载后使用migraphx::gpu::target{}设置编译模式为GPU模式，并使用compile()函数编译模型，完成模型的初始化过程。
-其中，模型地址设置在/Resource/Configuration.xml文件中的Unet节点中。
+其中，模型地址设置在/Resource/Configuration.xml文件中的DeepLabV3节点中。
 ```C++
-ErrorCode Unet::Initialize(InitializationParameterOfSegmentation initParamOfSegmentationUnet)
+ErrorCode DeepLabV3::Initialize(InitializationParameterOfSegmentation initParamOfSegmentationUnet){
-{
    ...
    // 加载模型
-    net = migraphx::parse_onnx(modelPath); 
+    if(!Exists(modelPath))
-    LOG_INFO(logFile,"succeed to load model: %s\n",GetFileName(modelPath).c_str());
-    // 获取模型输入/输出节点信息
-    std::cout<<"inputs:"<<std::endl;
-    std::unordered_map<std::string, migraphx::shape> inputs=net.get_inputs();
-    for(auto i:inputs)
    {
-        std::cout<<i.first<<":"<<i.second<<std::endl;
+        LOG_ERROR(stdout, "%s not exist!\n", modelPath.c_str());
+        return MODEL_NOT_EXIST;
    }
-    std::cout<<"outputs:"<<std::endl;
-    std::unordered_map<std::string, migraphx::shape> outputs=net.get_outputs();
+    migraphx::onnx_options onnx_options;
-    for(auto i:outputs)
+    if(initParamOfSegmentationUnet.loadMode){
-    {
+        onnx_options.map_input_dims["input"] = {1, 3, 513, 513};
-        std::cout<<i.first<<":"<<i.second<<std::endl;
+    }else{
+        onnx_options.map_input_dims["input"] = {3, 3, 513, 513};
    }
-    inputName=inputs.first;
+    net = migraphx::parse_onnx(modelPath,onnx_options);
-    inputShape=inputs.second;
+    LOG_INFO(stdout, "succeed to load model: %s\n", GetFileName(modelPath).c_str());
-    int N=inputShape.lens()[0];
-    int C=inputShape.lens()[1];
-    int H=inputShape.lens()[2];
-    int W=inputShape.lens()[3];
-    inputSize=cv::Size(W,H);
+    // 获取模型输入/输出节点信息
+    std::unordered_map<std::string, migraphx::shape> inputs  = net.get_inputs();
+    std::unordered_map<std::string, migraphx::shape> outputs = net.get_outputs();
+    inputName = inputs.begin()->first;
+    inputShape = inputs.begin()->second;
+    outputName                                               = outputs.begin()->first;
+    outputShape                                              = outputs.begin()->second;
+    auto it = outputs.begin();
+    ++it;
+    outputName2                                              = it->first;
+    outputShape2                                             = it->second;
+    int N = inputShape.lens()[0];
+    int C = inputShape.lens()[1];
+    int H = inputShape.lens()[2];
+    int W = inputShape.lens()[3];
+    inputSize = cv::Size(W, H);
    // 设置模型为GPU模式
    migraphx::target gpuTarget = migraphx::gpu::target{};
+    if(useInt8){
+        std::vector<cv::Mat> calibrateImages;
+        std::string folderPath = "../Resource/Images/calibrateImages/";
+        std::string calibrateImageExt = "*.jpg";
+        std::vector<cv::String> calibrateImagePaths;
+        cv::glob(folderPath + calibrateImageExt, calibrateImagePaths, false);
+        for(const auto& path : calibrateImagePaths){
+            calibrateImages.push_back(cv::imread(path, 1));
+        }
+        cv::Mat inputcalibrateBlob;
+        cv::dnn::blobFromImages(calibrateImages, inputcalibrateBlob, 1 / 255.0, inputSize, cv::Scalar(0, 0, 0), true, false);
+        std::unordered_map<std::string, migraphx::argument> inputData;
+        inputData[inputName] = migraphx::argument{inputShape, (float *)inputcalibrateBlob.data};
+        std::vector<std::unordered_map<std::string, migraphx::argument>> calibrationData = {inputData};
+         // INT8量化
+        migraphx::quantize_int8(net, gpuTarget, calibrationData);
+    }else{
+        migraphx::quantize_fp16(net);
+    }
    // 编译模型
    migraphx::compile_options options;
-    options.device_id=0;                          // 设置GPU设备，默认为0号设备
+    options.device_id    = 0; // 设置GPU设备，默认为0号设备
-    options.offload_copy=true;                    // 设置offload_copy
+    if(useOffloadCopy){
-    net.compile(gpuTarget,options);               
+        options.offload_copy = true;
-    LOG_INFO(logFile,"succeed to compile model: %s\n",GetFileName(modelPath).c_str());
+    }else{
+        options.offload_copy = false;
+    }
+    net.compile(gpuTarget, options);
+    LOG_INFO(stdout, "succeed to compile model: %s\n", GetFileName(modelPath).c_str());
+    if(!useOffloadCopy){
+        inputBufferDevice = nullptr;
+        hipMalloc(&inputBufferDevice, inputShape.bytes());
+        modalDataMap[inputName] = migraphx::argument{inputShape, inputBufferDevice};                   
+        outputBufferDevice = nullptr;
+        hipMalloc(&outputBufferDevice, outputShape.bytes());
+        outputBufferDevice2 = nullptr;
+        hipMalloc(&outputBufferDevice2, outputShape2.bytes());
+        modalDataMap[outputName] = migraphx::argument{outputShape, outputBufferDevice};
+        modalDataMap[outputName2] = migraphx::argument{outputShape2, outputBufferDevice2};
+        outputBufferHost             = nullptr; // host内存
+        outputBufferHost             = malloc(outputShape.bytes());
+        outputBufferHost2            = nullptr; // host内存
+        outputBufferHost2            = malloc(outputShape2.bytes());
+    }
    ...
 }
@@ -93,30 +144,49 @@ ErrorCode Unet::Segmentation(const cv::Mat &srcImage, cv::Mat &maskImage)
 ## 推理
-完成图像预处理后，就可以执行模型推理。首先，定义inputData表示deeplabv3模型的输入数据，inputName表示deeplabv3模型的输入节点名，采用migraphx::argument{inputShape, (float*)inputBlob.data}保存前面预处理的数据inputBlob，第一个参数表示输入数据的shape，第二个参数表示输入数据指针。其次，执行net.eval(inputData)获得模型的推理结果，由于这里只有一个输出节点，仅使用results[0]获取输出节点的数据，就可以对输出数据执行相关后处理操作。
+完成图像预处理后，就可以执行模型推理。
+当useOffloadCopy==true时，首先，定义inputData表示deeplabv3模型的输入数据，inputName表示deeplabv3模型的输入节点名，采用migraphx::argument{inputShape, (float*)inputBlob.data}保存前面预处理的数据inputBlob，第一个参数表示输入数据的shape，第二个参数表示输入数据指针。其次，执行net.eval(inputData)获得模型的推理结果，使用results[0]获取输出节点的数据，就可以对输出数据执行相关后处理操作。
+当useOffloadCopy==false时,首先，定义inputData表示deeplabv3模型的输入数据，并将数据拷贝到GPU中的输入内存。其次，执行net.eval(modalDataMap)执行推理，modalDataMap中保存着模型的GPU输出地址，推理的输出结果会保存在对应的输出内存中，将GPU输出数据拷贝到分配好的host输出内存后，即可获取输出节点的数据，就可以对输出数据执行相关后处理操作。
 ```c++
-ErrorCode Unet::Segmentation(const cv::Mat &srcImage, cv::Mat &maskImage)
+ErrorCode DeepLabV3::Segmentation(const cv::Mat &srcImage, cv::Mat &maskImage){
-{
    ...
-    // 创建输入数据
+	if(useOffloadCopy){
-    migraphx::parameter_map inputData;
+        // 创建输入数据
-    inputData[inputName]= migraphx::argument{inputShape, (float*)inputBlob.data};
+        std::unordered_map<std::string, migraphx::argument> inputData;
+        inputData[inputName] = migraphx::argument{inputShape, (float*)inputBatchBlob.data};
-    // 推理
+        // 推理
-    std::vector<migraphx::argument> results = net.eval(inputData);
+        std::vector<migraphx::argument> results = net.eval(inputData);
-    // 如果想要指定输出节点，可以给eval()函数中提供outputNames参数来实现
-    //std::vector<std::string> outputNames = {"outputs"};
+        // 获取输出节点的属性
-    //std::vector<migraphx::argument> inferenceResults = net.eval(inputData, outputNames);
+        migraphx::argument result   = results[0];                 // 获取第一个输出节点的数据
+        migraphx::shape outputShape = result.get_shape();         // 输出节点的shape
+        std::vector<std::size_t> outputSize = outputShape.lens(); // 每一维大小，维度顺序为(N,C,H,W)
-    // 获取输出节点的属性
-    migraphx::argument result = results[0];                 // 获取第一个输出节点的数据
+        int numberOfOutput = outputShape.elements();              // 输出节点元素的个数
-    migraphx::shape outputShape=result.get_shape();         // 输出节点的shape
+        float* data        = (float*)result.data();               // 输出节点数据指针
-    std::vector<std::size_t> outputSize=outputShape.lens(); // 每一维大小，维度顺序为(N,C,H,W) 
-    int numberOfOutput=outputShape.elements();              // 输出节点元素的个数
+    }else{
-    float *data = (float *)result.data();                   // 输出节点数据指针
+        migraphx::argument inputData = migraphx::argument{inputShape, (float*)inputBatchBlob.data};
+        // 拷贝到device输入内存
+        hipMemcpy(inputBufferDevice, inputData.data(), inputShape.bytes(), hipMemcpyHostToDevice);
+        // 推理
+        std::vector<migraphx::argument> results = net.eval(modalDataMap);
+        // 获取输出节点的属性
+        migraphx::argument result    = results[0];                                      // 获取第一个输出节点的数据
+        migraphx::shape outputShapes = result.get_shape();                              // 输出节点的shape
+        std::vector<std::size_t> outputSize = outputShapes.lens();                      // 每一维大小，维度顺序为(N,C,H,W)
+        int numberOfOutput = outputShapes.elements();                                   // 输出节点元素的个数
+        // 将device输出数据拷贝到分配好的host输出内存
+        hipMemcpy(outputBufferHost,outputBufferDevice, outputShapes.bytes(),hipMemcpyDeviceToHost); // 直接使用事先分配好的输出内存拷贝
+    }    
    ...
 }
@@ -129,11 +199,11 @@ ErrorCode Unet::Segmentation(const cv::Mat &srcImage, cv::Mat &maskImage)
 2.保存结果，创建一个cv::Mat，根据不同的通道索引在颜色映射表取值并按行依次赋值到Mat对应位置，得到最终的分割图像。
 ```c++
-ErrorCode Unet::Segmentation(const cv::Mat &srcImage, cv::Mat &maskImage)
+ErrorCode DeepLabV3::Segmentation(const cv::Mat &srcImage, cv::Mat &maskImage)
 {
    ...
-	cv::Mat outputImage(cv::Size(W, H), CV_8UC3);
+    cv::Mat outputImage(cv::Size(W, H), CV_8UC3);
    // 创建颜色映射表
    std::vector<cv::Scalar> color_map = create_color_map();
@@ -150,10 +220,8 @@ ErrorCode Unet::Segmentation(const cv::Mat &srcImage, cv::Mat &maskImage)
            outputImage.at<cv::Vec3b>(i, j)[0]= sc.val[0]; 
            outputImage.at<cv::Vec3b>(i, j)[1]= sc.val[1];
            outputImage.at<cv::Vec3b>(i, j)[2]= sc.val[2];
        }
    }
    maskImage = outputImage.clone();
    ...

--- a/Doc/Tutorial_Python.md
+++ b/Doc/Tutorial_Python.md
@@ -49,22 +49,47 @@ def Preprocessing(pil_img, newW, newH):
    # 获取模型输入/输出节点信息
    inputs = model.get_inputs()
    outputs = model.get_outputs()
+    inputName = model.get_parameter_names()[0]
+    inputShape = inputs[inputName].lens()
+    #量化
+    if useInt8:
+        dic = dict()
+        calibrate_folder_path = "../Resource/Images/"
+        calibrate_image_extensions = ('.jpg')
+        calibrate_image_list = []
+        for filename in os.listdir(calibrate_folder_path):    
+            # 检查文件是否为图片
+            if filename.lower().endswith(calibrate_image_extensions):
+                file_path = os.path.join(calibrate_folder_path, filename)
+                img = cv2.imread(file_path)
+                calibrate_image_list.append(Preprocessing(img, 513, 513))
+        calibrate_img = np.concatenate(calibrate_image_list,axis=0)
+        dic[inputName] = migraphx.argument(calibrate_img)
+        calibration = [dic]
+        migraphx.quantize_int8(model, migraphx.get_target("gpu"), calibration)
+    if useFP16: 
+        migraphx.quantize_fp16(model)
+    if offloadCopy :
+        # 编译模型
+        model.compile(migraphx.get_target("gpu"), device_id=0)      # device_id: 设置GPU设备，默认为0号设备
+        # 模型推理
+        mask = model.run({'input':input_img})      
+        result = mask[0]                                            # 得到第一个输出节点的结果    
+    else:
+        # 编译模型
+        model.compile(migraphx.get_target("gpu"),offload_copy=False, device_id=0)      # device_id: 设置GPU设备，默认为0号设备
+        modelData = AllocateOutputMemory(model)                                          # 为输出节点分配device内存，用于保存输出数据
+        modelData[inputName] = migraphx.to_gpu(migraphx.argument(input_img))
+        # 推理
+        mask = model.run(modelData)
+        result = migraphx.from_gpu(mask[0])                         # 获取第1个输出节点的数据,migraphx.argument类型
-    # 编译模型
-    model.compile(migraphx.get_target("gpu"), device_id=0)      # device_id: 设置GPU设备，默认为0号设备
-    # 图像预处理
-    img = cv2.imread("../Resource/Images/000001.jpg")
-    input_img = Preprocessing(img, 513, 513)
-    print(inputs)
-    # 模型推理
-    mask = model.run({'images':input_img})      
-    result = mask[0]                                            # 得到第一个输出节点的结果
    # 对通道维度进行softmax
    softmax_result = softmax(result)                            
    # 计算通道维度最大值对应的索引（即类别索引）      
-    max_indices = np.argmax(softmax_result, axis=1)             # 等价于 np.argmax(arr, axis=1, keepdims=False)
+    max_indices = np.argmax(softmax_result, axis=1)      
    # 使用预设颜色
    color_map = np.array([
        [0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255],       # 0-3类
@@ -74,15 +99,17 @@ def Preprocessing(pil_img, newW, newH):
        [0, 64, 0], [0, 0, 64], [64, 64, 0], [64, 0, 64],       # 16-19类
        [0, 64, 64]                                             # 20类
    ], dtype=np.uint8)
+    for i in range(max_indices.shape[0]):
+        flat_index = max_indices[i]
+        rgb_image = color_map[flat_index]                          # # 将二维的类别索引图直接转换为三维的 RGB 彩色图像
+        fileName  = "Result_"+str(i+1)+".jpg"
+        cv2.imwrite(fileName, rgb_image)                            # 保存图像分割结果
-    flat_index = max_indices[0]                                 # 取第0批的数据 
+    print("Segmentation results have been saved to Python directory")                        # 保存图像分割结果
-    # 将二维的类别索引图直接转换为三维的 RGB 彩色图像
-    rgb_image = color_map[flat_index]
-    cv2.imwrite("Result.jpg", rgb_image)                         # 保存图像分割结果
 ```
-1.Preprocessing函数返回预处理后的数据（numpy类型），然后通过model.run({'images':input_img})得到推理结果，因为只有输入一张图片，所以通过mask[0]获取第一个输出节点的数据即可。
+1.Preprocessing函数返回预处理后的数据（numpy类型），在offloadCopy==true时，通过model.run({'images':input_img})得到推理结果，在offloadCopy==false时，通过mask = model.run(modelData)得到推理结果,通过mask[0]获取第一个输出节点的数据即可。
 2.模型得到的推理结果并不能直接作为分割结果。首先，需要计算softmax值，计算不同通道同一[H,W]位置的softmax值，找出概率最高的通道。其次，根据不同的通道索引转换为三维的 RGB 彩色图像。最终，保存结果得到分割图像。

--- a/Python/DeepLabV3.py
+++ b/Python/DeepLabV3.py
 import numpy as np
 import cv2
 import migraphx
+import argparse
+import os
 def Preprocessing(pil_img, newW, newH):
    assert newW > 0 and newH > 0, 'Scale is too small' 
    img_nd = cv2.cvtColor(pil_img, cv2.COLOR_BGR2RGB)    # BGR转换为RGB
-    img_nd = cv2.resize(img_nd, (newW, newH))            # 将图像尺寸修改为256x256
+    img_nd = cv2.resize(img_nd, (newW, newH))            # 将图像尺寸修改为newW x newH
    if len(img_nd.shape) == 2:                           # 获取图像的维度信息
        img_nd = np.expand_dims(img_nd, axis=2)          # 如果是2维的 扩充为3维  
@@ -19,45 +21,100 @@ def Preprocessing(pil_img, newW, newH):
    return img
-def Sigmoid(x):
+def AllocateOutputMemory(model):
-  return 1 / (1 + np.exp(-x))
+    outputData = {}
+    for key in model.get_outputs().keys():
+        outputData[key] = migraphx.allocate_gpu(s=model.get_outputs()[key])
+    return outputData    
 # 对通道维度执行Softmax
 def softmax(arr):
-    # 1：对通道维度计算指数，避免数值溢出（减去最大值）
    exp_vals = np.exp(arr - np.max(arr, axis=1, keepdims=True))
-    # 2：计算通道维度的指数和
    sum_exp = np.sum(exp_vals, axis=1, keepdims=True)
-    # 3：归一化得到Softmax结果
    return exp_vals / sum_exp  
 if __name__ == '__main__':
-    # 设置最大输入shape
+    parser = argparse.ArgumentParser()
-    maxInput={"inputs":[1,3,256,256]}
+    parser.add_argument("loadMode", type=int,help="0:DeepLabV3 Single Image Sample.\t 1：DeepLabV3 Multiple Image Sample.")
+    parser.add_argument("--enable_offload_copy", action="store_true")
+    precision_group = parser.add_mutually_exclusive_group()
+    precision_group.add_argument("--int8",action="store_true")
+    precision_group.add_argument("--fp16",action="store_true")
+    args = parser.parse_args()
+    loadMode = args.loadMode
+    useInt8 = args.int8
+    useFP16 = args.fp16
+    offloadCopy = args.enable_offload_copy
+    #加载图片方式
+    if loadMode == 0:
+        maxInput={"input":[1,3,513,513]}
+        img = cv2.imread("../Resource/Images/000001.jpg")
+        input_img = Preprocessing(img, 513, 513)
+    else:
+        maxInput={"input":[3,3,513,513]}
+        folder_path = "../Resource/Images/"
+        image_extensions = ('.jpg')
+        image_list = []
+        for filename in os.listdir(folder_path):    
+            # 检查文件是否为图片
+            if filename.lower().endswith(image_extensions):
+                file_path = os.path.join(folder_path, filename)
+                img = cv2.imread(file_path)
+                image_list.append(Preprocessing(img, 513, 513))
+        input_img = np.concatenate(image_list,axis=0)
    # 加载模型
    model = migraphx.parse_onnx("../Resource/Models/deeplabv3_resnet101.onnx", map_input_dims=maxInput)
    # 获取模型输入/输出节点信息
    inputs = model.get_inputs()
    outputs = model.get_outputs()
+    inputName = model.get_parameter_names()[0]
+    inputShape = inputs[inputName].lens()
-    # 编译模型
+    #量化
-    model.compile(migraphx.get_target("gpu"), device_id=0)      # device_id: 设置GPU设备，默认为0号设备
+    if useInt8:
+        dic = dict()
-    # 图像预处理
+        calibrate_folder_path = "../Resource/Images/"
-    img = cv2.imread("../Resource/Images/000001.jpg")
+        calibrate_image_extensions = ('.jpg')
-    input_img = Preprocessing(img, 513, 513)
+        calibrate_image_list = []
+        for filename in os.listdir(calibrate_folder_path):    
+            # 检查文件是否为图片
+            if filename.lower().endswith(calibrate_image_extensions):
+                file_path = os.path.join(calibrate_folder_path, filename)
+                img = cv2.imread(file_path)
+                calibrate_image_list.append(Preprocessing(img, 513, 513))
+        calibrate_img = np.concatenate(calibrate_image_list,axis=0)
+        dic[inputName] = migraphx.argument(calibrate_img)
+        calibration = [dic]
+        migraphx.quantize_int8(model, migraphx.get_target("gpu"), calibration)
+    if useFP16: 
+        migraphx.quantize_fp16(model)
+    if offloadCopy :
+        # 编译模型
+        model.compile(migraphx.get_target("gpu"), device_id=0)      # device_id: 设置GPU设备，默认为0号设备
+        # 模型推理
+        mask = model.run({'input':input_img})      
+        result = mask[0]                                            # 得到第一个输出节点的结果    
+    else:
+        # 编译模型
+        model.compile(migraphx.get_target("gpu"),offload_copy=False, device_id=0)      # device_id: 设置GPU设备，默认为0号设备
+        modelData = AllocateOutputMemory(model)                                          # 为输出节点分配device内存，用于保存输出数据
+        modelData[inputName] = migraphx.to_gpu(migraphx.argument(input_img))
+        # 推理
+        mask = model.run(modelData)
+        result = migraphx.from_gpu(mask[0])                         # 获取第1个输出节点的数据,migraphx.argument类型
-    print(inputs)
-    # 模型推理
-    mask = model.run({'images':input_img})      
-    result = mask[0]                                            # 得到第一个输出节点的结果
    # 对通道维度进行softmax
    softmax_result = softmax(result)                            
    # 计算通道维度最大值对应的索引（即类别索引）      
-    max_indices = np.argmax(softmax_result, axis=1)             # 等价于 np.argmax(arr, axis=1, keepdims=False)
+    max_indices = np.argmax(softmax_result, axis=1)      
    # 使用预设颜色
    color_map = np.array([
        [0, 0, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255],       # 0-3类
@@ -67,9 +124,12 @@ if __name__ == '__main__':
        [0, 64, 0], [0, 0, 64], [64, 64, 0], [64, 0, 64],       # 16-19类
        [0, 64, 64]                                             # 20类
    ], dtype=np.uint8)
+    for i in range(max_indices.shape[0]):
+        flat_index = max_indices[i]
+        rgb_image = color_map[flat_index]                          # # 将二维的类别索引图直接转换为三维的 RGB 彩色图像
+        fileName  = "Result_"+str(i+1)+".jpg"
+        cv2.imwrite(fileName, rgb_image)                            # 保存图像分割结果
+    print("Segmentation results have been saved to Python directory")
-    flat_index = max_indices[0]                                 # 取第0批的数据 
-    # 将二维的类别索引图直接转换为三维的 RGB 彩色图像
-    rgb_image = color_map[flat_index]
-    cv2.imwrite("Result.jpg", rgb_image)                         # 保存图像分割结果
--- a/Python/convert.py
+++ b/Python/convert.py
@@ -3,7 +3,6 @@ import torchvision
 from torchvision import models
 model = models.segmentation.deeplabv3_resnet101(pretrained=True)
 model.eval()  # 必须切换到推理模式，关闭 dropout/batchnorm 等训练特有的层
@@ -17,6 +16,10 @@ torch.onnx.export(
    input_tensor,           # 示例输入（用于确定计算图结构）
    onnx_file,              # 输出文件路径
    opset_version=12,       # ONNX算子集版本（建议≥11，支持更多算子）
-    input_names=["images"], # 输入节点名称（需与后续推理时一致）
+    input_names=["input"], # 输入节点名称（需与后续推理时一致）
-    output_names=["output"]# 输出节点名称
+    output_names=["output"],# 输出节点名称
+    dynamic_axes={          # 关键：指定 input 和 output 的批量维度（第 0 维）为动态
+        "input": {0: "batch_size"},
+        "output": {0: "batch_size"}
+    },
 )
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ DeepLabv3 的核心原理如下：
 拉取镜像：
 ```
-docker pull image.sourcefind.cn:5000/dcu/admin/base/migraphx:4.3.0-ubuntu20.04-dtk24.04.1-py3.10
+docker pull image.sourcefind.cn:5000/dcu/admin/base/migraphx:5.0.0-ubuntu22.04-dtk25.04.1-py3.10
 ```
 创建并启动容器：
@@ -50,7 +50,7 @@ docker run --shm-size 16g --network=host --name=deeplabv3_migraphx --privileged
 source /opt/dtk/env.sh
 ```
-## 模型文件
+## 转换onnx模型
 ```
 # 安装onnx
 pip install onnx
@@ -158,7 +158,8 @@ cd <path_to_deeplabv3_migraphx>
 cd build/
 # 执行示例程序
-./DeepLabV3
+./DeepLabV3 1 		# 分割多张图片
+# ./DeepLabV3 0 	# 分割单张图片
 ```
 ## result
@@ -167,13 +168,95 @@ cd build/
 python程序运行结束后，会在当前目录中生成分割图像。
-<img src="./Doc/Images/deeplabv3_02.png" style="zoom:100%;" align=middle>
+<center class="half">
+<img src="./Doc/Images/000001.jpg" width = "50%" height="300px" alt="***" align=left />
+<img src="./Doc/Images/Result_000001.jpg" width = "50%" height="300px"  alt="***" align=right />
+<center>
+<center class="half">
+<img src="./Doc/Images/000002.jpg" width = "50%" height="300px" alt="***" align=left />
+<img src="./Doc/Images/Result_000002.jpg" width = "50%" height="300px"  alt="***" align=right />
+<center>
+<center class="half">
+<img src="./Doc/Images/000003.jpg" width = "50%" height="300px" alt="***" align=left />
+<img src="./Doc/Images/Result_000003.jpg" width = "50%" height="300px"  alt="***" align=right />
+<center>
 ### C++版本
 C++程序运行结束后，会在build目录生成分割图像。
-<img src="./Doc/Images/deeplabv3_03.png" style="zoom:100%;" align=middle>
+<center class="half">
+<img src="./Doc/Images/000001.jpg" width = "50%" height="300px" alt="***" align=left />
+<img src="./Doc/Images/Result_000001.jpg" width = "50%" height="300px"  alt="***" align=right />
+<center>
+<center class="half">
+<img src="./Doc/Images/000002.jpg" width = "50%" height="300px" alt="***" align=left />
+<img src="./Doc/Images/Result_000002.jpg" width = "50%" height="300px"  alt="***" align=right />
+<center>
+<center class="half">
+<img src="./Doc/Images/000003.jpg" width = "50%" height="300px" alt="***" align=left />
+<img src="./Doc/Images/Result_000003.jpg" width = "50%" height="300px"  alt="***" align=right />
+<center>
 ### 精度

--- a/Resource/Configuration.xml
+++ b/Resource/Configuration.xml
@@ -4,6 +4,9 @@
 	<!--DeepLabV3-->
 	<DeepLabV3>
 		<ModelPath>"../Resource/Models/deeplabv3_resnet101.onnx"</ModelPath>
+		<UseInt8>1</UseInt8>			  <!--是否使用int8,不支持-->
+		<UseFP16>0</UseFP16>			  <!--是否使用FP16-->
+		<UseOffloadCopy>0</UseOffloadCopy><!--是否使用offloadcopy-->
 	</DeepLabV3>
 </opencv_storage>
--- a/Resource/Images/000003.jpg
+++ b/Resource/Images/000003.jpg
--- a/Resource/Images/calibrateImages/000001.jpg
+++ b/Resource/Images/calibrateImages/000001.jpg
--- a/Resource/Images/calibrateImages/000002.jpg
+++ b/Resource/Images/calibrateImages/000002.jpg
--- a/Resource/Images/calibrateImages/000003.jpg
+++ b/Resource/Images/calibrateImages/000003.jpg
--- a/Resource/Images/calibrateImages/000004.jpg
+++ b/Resource/Images/calibrateImages/000004.jpg