#include #include #include #include #include #include namespace migraphxSamples { DetectorYOLOX::DetectorYOLOX() {} DetectorYOLOX::~DetectorYOLOX() { configurationFile.release(); } ErrorCode DetectorYOLOX::Initialize( InitializationParameterOfDetector initializationParameterOfDetector) { // 读取配置文件 std::string configFilePath = initializationParameterOfDetector.configFilePath; if (!Exists(configFilePath)) { LOG_ERROR(stdout, "no configuration file!\n"); return CONFIG_FILE_NOT_EXIST; } if (!configurationFile.open(configFilePath, cv::FileStorage::READ)) { LOG_ERROR(stdout, "fail to open configuration file\n"); return FAIL_TO_OPEN_CONFIG_FILE; } LOG_INFO(stdout, "succeed to open configuration file\n"); // 获取配置文件参数 cv::FileNode netNode = configurationFile["DetectorYOLOX"]; modelPath = (std::string)netNode["ModelPathStatic"]; std::string pathOfClassNameFile = (std::string)netNode["ClassNameFile"]; yoloxParameter.confidenceThreshold = (float)netNode["ConfidenceThreshold"]; yoloxParameter.nmsThreshold = (float)netNode["NMSThreshold"]; yoloxParameter.objectThreshold = (float)netNode["ObjectThreshold"]; yoloxParameter.numberOfClasses = (int)netNode["NumberOfClasses"]; useFP16 = (bool)(int)netNode["UseFP16"]; // 加载模型 if (!Exists(modelPath)) { LOG_ERROR(stdout, "%s not exist!\n", modelPath.c_str()); return MODEL_NOT_EXIST; } net = migraphx::parse_onnx(modelPath); LOG_INFO(stdout, "succeed to load model: %s\n", GetFileName(modelPath).c_str()); // 获取模型输入/输出节点信息 std::cout << "inputs:" << std::endl; std::unordered_map inputs = net.get_inputs(); for (auto i : inputs) { std::cout << i.first << ":" << i.second << std::endl; } std::cout << "outputs:" << std::endl; std::unordered_map outputs = net.get_outputs(); for (auto i : outputs) { std::cout << i.first << ":" << i.second << std::endl; } inputName = inputs.begin()->first; inputShape = inputs.begin()->second; int N = inputShape.lens()[0]; int C = inputShape.lens()[1]; int H = inputShape.lens()[2]; int W = inputShape.lens()[3]; inputSize = cv::Size(W, H); // log LOG_INFO(stdout, "InputSize:%dx%d\n", inputSize.width, inputSize.height); LOG_INFO(stdout, "InputName:%s\n", inputName.c_str()); LOG_INFO(stdout, "ConfidenceThreshold:%f\n", yoloxParameter.confidenceThreshold); LOG_INFO(stdout, "NMSThreshold:%f\n", yoloxParameter.nmsThreshold); LOG_INFO(stdout, "objectThreshold:%f\n", yoloxParameter.objectThreshold); LOG_INFO(stdout, "NumberOfClasses:%d\n", yoloxParameter.numberOfClasses); // 设置模型为GPU模式 migraphx::target gpuTarget = migraphx::gpu::target{}; // 量化 if (useFP16) { migraphx::quantize_fp16(net); } // 编译模型 migraphx::compile_options options; options.device_id = 0; options.offload_copy = true; net.compile(gpuTarget, options); LOG_INFO(stdout, "succeed to compile model: %s\n", GetFileName(modelPath).c_str()); // warm up std::unordered_map inputData; inputData[inputName] = migraphx::argument{inputShape}; net.eval(inputData); // 读取类别名 if (!pathOfClassNameFile.empty()) { std::ifstream classNameFile(pathOfClassNameFile); std::string line; while (getline(classNameFile, line)) { classNames.push_back(line); } } else { classNames.resize(yoloxParameter.numberOfClasses); } return SUCCESS; } void DetectorYOLOX::generate_grids_and_stride( std::vector &strides, std::vector &grid_strides, cv::Size inputSize) { for (auto stride : strides) { int num_grid_y = inputSize.height / stride; int num_grid_x = inputSize.width / stride; for (int g1 = 0; g1 < num_grid_y; g1++) { for (int g0 = 0; g0 < num_grid_x; g0++) { grid_strides.push_back((GridAndStride){g0, g1, stride}); } } } } void DetectorYOLOX::generate_yolox_proposals( std::vector grid_strides, float *feat_blob, float prob_threshold, std::vector &objects) { const int num_anchors = grid_strides.size(); float max_box_objectness = 0; for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) { const int grid0 = grid_strides[anchor_idx].grid0; const int grid1 = grid_strides[anchor_idx].grid1; const int stride = grid_strides[anchor_idx].stride; const int basic_pos = anchor_idx * (yoloxParameter.numberOfClasses + 5); // yolox/models/yolo_head.py decode logic float x_center = (feat_blob[basic_pos + 0] + grid0) * stride; float y_center = (feat_blob[basic_pos + 1] + grid1) * stride; float w = exp(feat_blob[basic_pos + 2]) * stride; float h = exp(feat_blob[basic_pos + 3]) * stride; float x0 = x_center - w * 0.5f; float y0 = y_center - h * 0.5f; float box_objectness = feat_blob[basic_pos + 4]; max_box_objectness = box_objectness > max_box_objectness ? box_objectness : max_box_objectness; float max_box_cls_score = 0; int max_score_class_idx = 0; for (int class_idx = 0; class_idx < yoloxParameter.numberOfClasses; class_idx++) { float box_cls_score = feat_blob[basic_pos + 5 + class_idx]; if (box_cls_score > max_box_cls_score) { max_box_cls_score = box_cls_score; max_score_class_idx = class_idx; } } // class loop float box_prob = box_objectness * max_box_cls_score; if (box_objectness > yoloxParameter.objectThreshold && box_prob > yoloxParameter.confidenceThreshold) { Object obj; obj.rect.x = x0; obj.rect.y = y0; obj.rect.width = w; obj.rect.height = h; obj.label = max_score_class_idx; obj.prob = box_prob; objects.push_back(obj); } } // point anchor loop } void DetectorYOLOX::qsort_descent_inplace(std::vector &faceobjects, int left, int right) { int i = left; int j = right; float p = faceobjects[(left + right) / 2].prob; while (i <= j) { while (faceobjects[i].prob > p) i++; while (faceobjects[j].prob < p) j--; if (i <= j) { // swap std::swap(faceobjects[i], faceobjects[j]); i++; j--; } } #pragma omp parallel sections { #pragma omp section { if (left < j) qsort_descent_inplace(faceobjects, left, j); } #pragma omp section { if (i < right) qsort_descent_inplace(faceobjects, i, right); } } } void DetectorYOLOX::qsort_descent_inplace(std::vector &objects) { if (objects.empty()) return; qsort_descent_inplace(objects, 0, objects.size() - 1); } inline float DetectorYOLOX::intersection_area(const Object &a, const Object &b) { cv::Rect_ inter = a.rect & b.rect; return inter.area(); } void DetectorYOLOX::nms_sorted_bboxes(const std::vector &faceobjects, std::vector &picked, float nms_threshold) { picked.clear(); const int n = faceobjects.size(); std::vector areas(n); for (int i = 0; i < n; i++) { areas[i] = faceobjects[i].rect.area(); } for (int i = 0; i < n; i++) { const Object &a = faceobjects[i]; int keep = 1; for (int j = 0; j < (int)picked.size(); j++) { const Object &b = faceobjects[picked[j]]; // intersection over union float inter_area = intersection_area(a, b); float union_area = areas[i] + areas[picked[j]] - inter_area; // float IoU = inter_area / union_area if (inter_area / union_area > nms_threshold) keep = 0; } if (keep) picked.push_back(i); } } void DetectorYOLOX::decode_outputs(float *prob, std::vector &objects, float scalew, float scaleh, const int img_w, const int img_h, cv::Size inputSize) { std::vector proposals; std::vector strides = {8, 16, 32}; std::vector grid_strides; generate_grids_and_stride(strides, grid_strides, inputSize); generate_yolox_proposals(grid_strides, prob, yoloxParameter.confidenceThreshold, proposals); std::cout << "num of boxes before nms: " << proposals.size() << std::endl; qsort_descent_inplace(proposals); std::vector picked; nms_sorted_bboxes(proposals, picked, yoloxParameter.nmsThreshold); int count = picked.size(); std::cout << "num of boxes: " << count << std::endl; objects.resize(count); for (int i = 0; i < count; i++) { objects[i] = proposals[picked[i]]; // adjust offset to original unpadded float x0 = (objects[i].rect.x) / scalew; float y0 = (objects[i].rect.y) / scaleh; float x1 = (objects[i].rect.x + objects[i].rect.width) / scalew; float y1 = (objects[i].rect.y + objects[i].rect.height) / scaleh; // clip x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); objects[i].rect.x = x0; objects[i].rect.y = y0; objects[i].rect.width = x1 - x0; objects[i].rect.height = y1 - y0; } } void meshgrid(const cv::Range &x_range, const cv::Range &y_range, cv::Mat &xv, cv::Mat &yv) { // 初始化矩阵大小 int rows = y_range.end - y_range.start + 1; int cols = x_range.end - x_range.start + 1; // 创建 xv 和 yv 矩阵 xv = cv::Mat(rows, cols, CV_32F); yv = cv::Mat(rows, cols, CV_32F); // 逐行逐列赋值 for (int i = 0; i < rows; ++i) { for (int j = 0; j < cols; ++j) { xv.at(i, j) = static_cast(j + x_range.start); yv.at(i, j) = static_cast(i + y_range.start); } } } cv::Mat demo_postprocess(cv::Mat outputs, cv::Size img_size, bool p6 = false) { std::vector grids; std::vector expanded_strides; std::vector strides = p6 ? std::vector{8, 16, 32, 64} : std::vector{8, 16, 32}; std::vector hsizes, wsizes; for (int stride : strides) { hsizes.push_back(img_size.height / stride); wsizes.push_back(img_size.width / stride); } for (size_t i = 0; i < strides.size(); ++i) { cv::Mat xv, yv; meshgrid(cv::Range(0, wsizes[i]), cv::Range(0, hsizes[i]), xv, yv); cv::Mat grid = cv::Mat::zeros(hsizes[i] * wsizes[i], 2, CV_32F); for (int j = 0; j < hsizes[i] * wsizes[i]; ++j) { grid.at(j, 0) = xv.at(j); grid.at(j, 1) = yv.at(j); } grids.push_back(grid); cv::Mat expanded_stride = cv::Mat::ones(hsizes[i] * wsizes[i], 1, CV_32F) * strides[i]; expanded_strides.push_back(expanded_stride); } cv::Mat grids_concatenated, expanded_strides_concatenated; cv::vconcat(grids, grids_concatenated); cv::vconcat(expanded_strides, expanded_strides_concatenated); cv::Mat outputs_clone = outputs.clone(); for (int i = 0; i < outputs_clone.rows; ++i) { outputs_clone.at(i, 0) = (outputs.at(i, 0) + grids_concatenated.at(i, 0)) * expanded_strides_concatenated.at(i, 0); outputs_clone.at(i, 1) = (outputs.at(i, 1) + grids_concatenated.at(i, 1)) * expanded_strides_concatenated.at(i, 0); outputs_clone.at(i, 2) = exp(outputs.at(i, 2)) * expanded_strides_concatenated.at(i, 0); outputs_clone.at(i, 3) = exp(outputs.at(i, 3)) * expanded_strides_concatenated.at(i, 0); } return outputs_clone; } std::vector nms(cv::Mat boxes, cv::Mat scores, float nms_thr) { std::vector keep; std::vector areas(boxes.rows); // 计算所有框的面积 for (int i = 0; i < boxes.rows; ++i) { float x1 = boxes.at(i, 0); float y1 = boxes.at(i, 1); float x2 = boxes.at(i, 2); float y2 = boxes.at(i, 3); areas[i] = (x2 - x1 + 1) * (y2 - y1 + 1); } // 根据分数排序的索引 std::vector order(scores.rows); std::iota(order.begin(), order.end(), 0); std::sort(order.begin(), order.end(), [&scores](int i, int j) { return scores.at(i) > scores.at(j); }); // 执行 NMS while (!order.empty()) { int i = order[0]; keep.push_back(i); float xx1 = std::max(boxes.at(i, 0), boxes.at(order[1], 0)); float yy1 = std::max(boxes.at(i, 1), boxes.at(order[1], 1)); float xx2 = std::min(boxes.at(i, 2), boxes.at(order[1], 2)); float yy2 = std::min(boxes.at(i, 3), boxes.at(order[1], 3)); float w = std::max(0.0f, xx2 - xx1 + 1); float h = std::max(0.0f, yy2 - yy1 + 1); float inter = w * h; float ovr = inter / (areas[i] + areas[order[1]] - inter); std::vector inds; for (size_t j = 1; j < order.size(); ++j) { if (ovr <= nms_thr) { inds.push_back(order[j]); } } order = inds; } return keep; } cv::Mat multiclass_nms_class_agnostic(cv::Mat boxes, cv::Mat scores, float nms_thr, float score_thr) { // 获取每个框的最高分数的索引和分数 cv::Mat cls_inds; cv::Mat cls_scores = cv::Mat::zeros(scores.rows, 1, CV_32F); for (int i = 0; i < scores.rows; ++i) { int max_idx; // cv::minMaxIdx(scores.row(i), nullptr, &cls_scores.at(i), // nullptr, &max_idx); double cls_score; cv::minMaxIdx(scores.row(i), nullptr, &cls_score, nullptr, &max_idx); cls_scores.at(i) = static_cast(cls_score); cls_inds.push_back(max_idx); } // 过滤掉低于阈值的分数 cv::Mat valid_score_mask = cls_scores > score_thr; if (cv::countNonZero(valid_score_mask) == 0) { return cv::Mat(); // 如果没有有效的分数,返回空矩阵 } // 保留有效分数对应的框和类别索引 cv::Mat valid_scores = cls_scores(valid_score_mask); cv::Mat valid_boxes = boxes.rowRange(0, boxes.rows).clone(); // 复制框数据以便后续修改 cv::Mat valid_cls_inds = cls_inds(valid_score_mask); // 应用 NMS 算法 std::vector keep = nms(valid_boxes, valid_scores, nms_thr); if (keep.empty()) { return cv::Mat(); // 如果没有保留的框,返回空矩阵 } // 按行组合保留的框、分数和类别索引 cv::Mat dets(keep.size(), 6, CV_32F); for (size_t i = 0; i < keep.size(); ++i) { int idx = keep[i]; valid_boxes.row(idx).copyTo(dets.row(i).colRange(0, 4)); dets.at(i, 4) = valid_scores.at(idx); dets.at(i, 5) = valid_cls_inds.at(idx); } return dets; } ErrorCode DetectorYOLOX::Detect(const cv::Mat &srcImage, const std::vector &relInputShape, std::vector &resultsOfDetection) { if (srcImage.empty() || srcImage.type() != CV_8UC3) { LOG_ERROR(stdout, "image error!\n"); return IMAGE_ERROR; } // 数据预处理并转换为NCHW格式 inputSize = cv::Size(relInputShape[3], relInputShape[2]); cv::Mat inputBlob; cv::dnn::blobFromImage(srcImage, inputBlob, 1, inputSize, cv::Scalar(0, 0, 0), false, false); float ratio = std::min(inputSize.width / srcImage.rows, inputSize.height / srcImage.cols); // 创建输入数据 migraphx::parameter_map inputData; inputData[inputName] = migraphx::argument{inputShape, (float *)inputBlob.data}; // 推理 std::vector inferenceResults = net.eval(inputData); // 获取推理结果 std::vector outs; migraphx::argument result = inferenceResults[0]; // 转换为cv::Mat migraphx::shape outputShape = result.get_shape(); int shape[] = {outputShape.lens()[0], outputShape.lens()[1], outputShape.lens()[2]}; cv::Mat out(3, shape, CV_32F); memcpy(out.data, result.data(), sizeof(float) * outputShape.elements()); outs.push_back(out); // 获取先验框的个数 int numProposal = outs[0].size[1]; int numOut = outs[0].size[2]; // 变换输出的维度 outs[0] = outs[0].reshape(0, numProposal); float *prob = (float *)outs[0].data; std::vector objects; float scalew = inputSize.width / (srcImage.cols * 1.0); float scaleh = inputSize.height / (srcImage.rows * 1.0); decode_outputs(prob, objects, scalew, scaleh, srcImage.cols, srcImage.rows, inputSize); for (size_t i = 0; i < objects.size(); ++i) { ResultOfDetection result; result.boundingBox = objects[i].rect; result.confidence = objects[i].prob; // confidence result.classID = objects[i].label; // label result.className = classNames[objects[i].label]; resultsOfDetection.push_back(result); } return SUCCESS; } } // namespace migraphxSamples