yolov7-tiny.cc

#include <dirent.h>
#include <getopt.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <algorithm>
#include <condition_variable>
#include <fstream>
#include <iostream>
#include <iterator>
#include <mutex>
#include <queue>
#include <string>
#include "grpc_client.h"
#include "http_client.h"
#include "json_utils.h"

#include <opencv2/opencv.hpp>
#include <opencv2/core/version.hpp>
#if CV_MAJOR_VERSION == 2
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#elif CV_MAJOR_VERSION >= 3
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
#endif

#if CV_MAJOR_VERSION == 4
#define GET_TRANSFORMATION_CODE(x) cv::COLOR_##x
#else
#define GET_TRANSFORMATION_CODE(x) CV_##x
#endif

using namespace cv;
namespace tc = triton::client;

namespace {

enum ProtocolType { HTTP = 0, GRPC = 1 };

struct ModelInfo {
    std::string output_name_;
    std::string input_name_;
    std::string input_datatype_;
    int input_c_;
    int input_h_;
    int input_w_;
    std::string input_format_;
    int type1_;
    int type3_;
    int max_batch_size_;
};

typedef struct  _ResultOfDetection
{
    cv::Rect boundingBox;
    float confidence;
    int classID;
    std::string className;
    bool exist;

    _ResultOfDetection():confidence(0.0f),classID(0),exist(true){}

}ResultOfDetection;

std::vector<int> NMSBoxes(const std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold)
{
    std::vector<int> indices;
    std::vector<float> areas(boxes.size());

    for (size_t i = 0; i < boxes.size(); i++)
    {
        areas[i] = boxes[i].width * boxes[i].height;
    }

    for (size_t i = 0; i < boxes.size(); i++)
    {
        if (scores[i] > score_threshold)
        {
            indices.push_back(static_cast<int>(i));
            for (size_t j = i + 1; j < boxes.size(); j++)
            {
                if (scores[j] > score_threshold)
                {
                    cv::Rect intersection = boxes[i] & boxes[j];
                    float overlap = static_cast<float>(intersection.area()) / (areas[i] + areas[j] - intersection.area());

                    if (overlap > nms_threshold)
                    {
                        scores[j] = 0.0f;
                    }
                }
            }
        }
    }

    return indices;
}

void Preprocess(
    const std::string& filename, int img_type1, int img_type3, size_t img_channels, 
    const cv::Size& img_size, std::vector<uint8_t>* input_data)
{

    cv::Mat img = cv::imread(filename, 1);
    if (img.empty()) {
        std::cerr << "error: unable to decode image " << filename << std::endl;
        exit(1);
    }

    cv::Mat sample;
    if ((img.channels() == 3) && (img_channels == 3)) {
        cv::cvtColor(img, sample, GET_TRANSFORMATION_CODE(BGR2RGB));
    } else {
        std::cerr << "unexpected number of channels " << img.channels()
                  << " in input image, model expects " << img_channels << "."
                  << std::endl;
        exit(1);
    }

    cv::Mat sample_resized;
    cv::resize(sample, sample_resized, img_size);

    cv::Mat sample_type;
    sample_resized.convertTo(sample_type, (img_channels == 3) ? img_type3 : img_type1);

    cv::Mat sample_final;
    sample_final = sample_type.mul(cv::Scalar(1.0 / 255.0, 1.0 / 255.0, 1.0 / 255.0));;

    size_t img_byte_size = sample_final.total() * sample_final.elemSize();
    size_t pos = 0;
    input_data->resize(img_byte_size);

    std::vector<cv::Mat> input_bgr_channels;
    for (size_t i = 0; i < img_channels; ++i) {
        input_bgr_channels.emplace_back(img_size.height, img_size.width, img_type1, &((*input_data)[pos]));
        pos += input_bgr_channels.back().total() * input_bgr_channels.back().elemSize();
    }

    cv::split(sample_final, input_bgr_channels);

    if (pos != img_byte_size) {
        std::cerr << "unexpected total size of channels " << pos << ", expecting "
                  << img_byte_size << std::endl;
        exit(1);
    }
}


void Postprocess(
    const std::unique_ptr<tc::InferResult> result,
    const std::vector<std::string>& filenames, const size_t batch_size,
    const std::string& output_name, const bool batching)
{
    if (!result->RequestStatus().IsOk()) {
        std::cerr << "inference  failed with error: " << result->RequestStatus()
                  << std::endl;
        exit(1);
    }
    if (filenames.size() != batch_size) {
        std::cerr << "expected " << batch_size << " filenames, got "
                  << filenames.size() << std::endl;
        exit(1);
    }

    std::vector<int64_t> shape;
    tc::Error err = result->Shape(output_name, &shape);
    if (!err.IsOk()) {
        std::cerr << "unable to get shape for " << output_name << std::endl;
        exit(1);
    }

    std::string datatype;
    err = result->Datatype(output_name, &datatype);
    if (!err.IsOk()) {
        std::cerr << "unable to get datatype for " << output_name << std::endl;
        exit(1);
    }

    const uint8_t* result_data;
    size_t outputCount = 0;
    err = result->RawData(output_name, &result_data, &outputCount);
    if (!err.IsOk()) {
         std::cerr << "unable to get data for " << output_name << std::endl;
         exit(1);
    }

    std::vector<cv::Mat> outs;
    cv::Mat srcImage = cv::imread(filenames[0], 1);
    int Shape[]={shape[0], shape[1], shape[2]};
    cv::Mat out(3, Shape, CV_32F);
    memcpy(out.data, result_data, sizeof(uint8_t)*outputCount);
    outs.push_back(out);

    int numProposal = outs[0].size[1];
    int numOut = outs[0].size[2];
    outs[0] = outs[0].reshape(0, numProposal);

    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;
    std::vector<int> classIds;
    float ratioh = (float)srcImage.rows / 640, ratiow = (float)srcImage.cols / 640;

    int n = 0, rowInd = 0;
    float* pdata = (float*)outs[0].data;
    for (n = 0; n < numProposal; n++)
    {
        float boxScores = pdata[4];
        if (boxScores > 0.5)
        {
            cv::Mat scores = outs[0].row(rowInd).colRange(5, numOut);
            cv::Point classIdPoint;
            double maxClassScore;
            cv::minMaxLoc(scores, 0, &maxClassScore, 0, &classIdPoint);
            maxClassScore *= boxScores;
            if (maxClassScore > 0.25)
            {
                const int classIdx = classIdPoint.x;
                float cx = pdata[0] * ratiow;
                float cy = pdata[1] * ratioh;
                float w = pdata[2] * ratiow;
                float h = pdata[3] * ratioh;

                int left = int(cx - 0.5 * w);
                int top = int(cy - 0.5 * h);

                confidences.push_back((float)maxClassScore);
                boxes.push_back(cv::Rect(left, top, (int)(w), (int)(h)));
                classIds.push_back(classIdx);
            }
        }
        rowInd++;
        pdata += numOut;
    }

    std::vector<int> indices;
    indices = NMSBoxes(boxes, confidences, 0.25, 0.5);
    std::vector<ResultOfDetection> resultsOfDetection;
    for (size_t i = 0; i < indices.size(); ++i)
    {
        int idx = indices[i];
        int classID=classIds[idx];
        float confidence=confidences[idx];
        cv::Rect box = boxes[idx];

        ResultOfDetection result;
        result.boundingBox=box;
        result.confidence=confidence;
        result.classID=classID;
        resultsOfDetection.push_back(result);
    }
    fprintf(stdout,"//////////////Detection Results//////////////\n");
    for( size_t i = 0; i < resultsOfDetection.size(); ++i)
    {
        ResultOfDetection result = resultsOfDetection[i];
        cv::rectangle(srcImage, result.boundingBox, cv::Scalar(0,255,255),2);

        fprintf(stdout,"box:%d %d %d %d,label:%d,confidence:%.3f\n",result.boundingBox.x,
        result.boundingBox.y,result.boundingBox.width,result.boundingBox.height,result.classID,result.confidence);
    }
    cv::imwrite("result.jpg", srcImage);
}

bool ParseType(const std::string& dtype, int* type1, int* type3)
{
    if (dtype.compare("UINT8") == 0) {
        *type1 = CV_8UC1;
        *type3 = CV_8UC3;
    } else if (dtype.compare("INT8") == 0) {
        *type1 = CV_8SC1;
        *type3 = CV_8SC3;
    } else if (dtype.compare("UINT16") == 0) {
        *type1 = CV_16UC1;
        *type3 = CV_16UC3;
    } else if (dtype.compare("INT16") == 0) {
        *type1 = CV_16SC1;
        *type3 = CV_16SC3;
    } else if (dtype.compare("INT32") == 0) {
        *type1 = CV_32SC1;
        *type3 = CV_32SC3;
    } else if (dtype.compare("FP32") == 0) {
        *type1 = CV_32FC1;
        *type3 = CV_32FC3;
    } else if (dtype.compare("FP64") == 0) {
        *type1 = CV_64FC1;
        *type3 = CV_64FC3;
    } else {
        return false;
    }

    return true;
}

void ParseModelHttp(
    const rapidjson::Document& model_metadata,
    const rapidjson::Document& model_config, const size_t batch_size,
    ModelInfo* model_info)
{
    const auto& input_itr = model_metadata.FindMember("inputs");
    size_t input_count = 0;
    if (input_itr != model_metadata.MemberEnd()) {
        input_count = input_itr->value.Size();
    }
    if (input_count != 1) {
        std::cerr << "expecting 1 input, got " << input_count << std::endl;
        exit(1);
    }

    const auto& output_itr = model_metadata.FindMember("outputs");
    size_t output_count = 0;
    if (output_itr != model_metadata.MemberEnd()) {
        output_count = output_itr->value.Size();
    }
    if (output_count != 1) {
        std::cerr << "expecting 1 output, got " << output_count << std::endl;
        exit(1);
    }

    const auto& input_config_itr = model_config.FindMember("input");
    input_count = 0;
    if (input_config_itr != model_config.MemberEnd()) {
        input_count = input_config_itr->value.Size();
    }
    if (input_count != 1) {
        std::cerr << "expecting 1 input in model configuration, got " << input_count
                  << std::endl;
        exit(1);
    }

    const auto& input_metadata = *input_itr->value.Begin();
    const auto& input_config = *input_config_itr->value.Begin();
    const auto& output_metadata = *output_itr->value.Begin();

    const auto& output_dtype_itr = output_metadata.FindMember("datatype");
    if (output_dtype_itr == output_metadata.MemberEnd()) {
        std::cerr << "output missing datatype in the metadata for model'"
                  << model_metadata["name"].GetString() << "'" << std::endl;
        exit(1);
    }
    auto datatype = std::string(output_dtype_itr->value.GetString(),
                    output_dtype_itr->value.GetStringLength());
    if (datatype.compare("FP32") != 0) {
        std::cerr << "expecting output datatype to be FP32, model '"
                  << model_metadata["name"].GetString() << "' output type is '"
                  << datatype << "'" << std::endl;
        exit(1);
    }

    int max_batch_size = 0;
    const auto bs_itr = model_config.FindMember("max_batch_size");
    if (bs_itr != model_config.MemberEnd()) {
        max_batch_size = bs_itr->value.GetUint();
    }
    model_info->max_batch_size_ = max_batch_size;

    if (max_batch_size == 0) {
        if (batch_size != 1) {
            std::cerr << "batching not supported for model '"
                      << model_metadata["name"].GetString() << "'" << std::endl;
            exit(1);
        }
    } else {
        if (batch_size > (size_t)max_batch_size) {
            std::cerr << "expecting batch size <= " << max_batch_size
                      << " for model '" << model_metadata["name"].GetString() << "'"
                      << std::endl;
            exit(1);
        }
    }

    const bool input_batch_dim = (max_batch_size == 0);
    const size_t expected_input_dims = 3 + (input_batch_dim ? 1 : 0);
    const auto input_shape_itr = input_metadata.FindMember("shape");
    if (input_shape_itr != input_metadata.MemberEnd()) {
        if (input_shape_itr->value.Size() != expected_input_dims) {
            std::cerr << " expecting input to have " << expected_input_dims
                      << " dimensions, model '" << model_metadata["name"].GetString()
                      << "' input has " << input_shape_itr->value.Size() << std::endl;
        }
    } else {
        std::cerr << "input missing shape in the metadata for model'"
                  << model_metadata["name"].GetString() << "'" << std::endl;
        exit(1);
    }

    model_info->input_format_ = std::string(input_config["format"].GetString(), input_config["format"].GetStringLength());
    model_info->output_name_ = std::string(output_metadata["name"].GetString(), output_metadata["name"].GetStringLength());
    model_info->input_name_ = std::string(input_metadata["name"].GetString(), input_metadata["name"].GetStringLength());
    model_info->input_datatype_ = std::string(input_metadata["datatype"].GetString(), input_metadata["datatype"].GetStringLength());

    model_info->input_c_ = input_shape_itr->value[1].GetInt();
    model_info->input_h_ = input_shape_itr->value[2].GetInt();
    model_info->input_w_ = input_shape_itr->value[3].GetInt();

    if (!ParseType(model_info->input_datatype_, &(model_info->type1_), &(model_info->type3_))) {
        std::cerr << "unexpected input datatype '" << model_info->input_datatype_
                  << "' for model \"" << model_metadata["name"].GetString()
                  << std::endl;
        exit(1);
    }
}

union TritonClient {
    TritonClient()
    {
        new (&http_client_) std::unique_ptr<tc::InferenceServerHttpClient>{};
    }
    ~TritonClient() {}

    std::unique_ptr<tc::InferenceServerHttpClient> http_client_;
    std::unique_ptr<tc::InferenceServerGrpcClient> grpc_client_;
};

}

int
main(int argc, char** argv)
{
    bool verbose = false;
    bool async = false;
    int batch_size = 1;

    if (argc < 3 || argc > 3)
    {
        fprintf(stdout, "Two args are required: ./a yolov7-tiny image_path\n");
        return -1;
    }

    std::string model_name = argv[1];
    std::string fileName = argv[2];
    std::string preprocess_output_filename;
    std::string model_version = "";
    std::string url("localhost:8000");
    ProtocolType protocol = ProtocolType::HTTP;
    tc::Headers http_headers;

    TritonClient triton_client;
    tc::Error err;
    err = tc::InferenceServerHttpClient::Create(
          &triton_client.http_client_, url, verbose);
    if (!err.IsOk()) {
        std::cerr << "error: unable to create client for inference: " << err << std::endl;
        exit(1);
    }

    ModelInfo model_info;
    std::string model_metadata;
    err = triton_client.http_client_->ModelMetadata(&model_metadata, model_name, model_version, http_headers);
    if (!err.IsOk()) {
        std::cerr << "error: failed to get model metadata: " << err << std::endl;
    }
    rapidjson::Document model_metadata_json;
    err = tc::ParseJson(&model_metadata_json, model_metadata);
    if (!err.IsOk()) {
        std::cerr << "error: failed to parse model metadata: " << err
                  << std::endl;
    }
    std::string model_config;
    err = triton_client.http_client_->ModelConfig(&model_config, model_name, model_version, http_headers);
    if (!err.IsOk()) {
        std::cerr << "error: failed to get model config: " << err << std::endl;
    }
    rapidjson::Document model_config_json;
    err = tc::ParseJson(&model_config_json, model_config);
    if (!err.IsOk()) {
        std::cerr << "error: failed to parse model config: " << err << std::endl;
    }
    ParseModelHttp( model_metadata_json, model_config_json, batch_size, &model_info);

    std::vector<std::string> image_filenames;
    struct stat name_stat;
    if (stat(fileName.c_str(), &name_stat) != 0) {
        std::cerr << "Failed to find '" << fileName << "': " << strerror(errno) << std::endl;
        exit(1);
    }

    if (name_stat.st_mode & S_IFDIR) {
        const std::string dirname = fileName;
        DIR* dir_ptr = opendir(dirname.c_str());
        struct dirent* d_ptr;
        while ((d_ptr = readdir(dir_ptr)) != NULL) {
            const std::string filename = d_ptr->d_name;
            if ((filename != ".") && (filename != "..")) {
                image_filenames.push_back(dirname + "/" + filename);
            }
        }
        closedir(dir_ptr);
    } else {
        image_filenames.push_back(fileName);
    }

    std::sort(image_filenames.begin(), image_filenames.end());

    std::vector<std::vector<uint8_t>> image_data;
    for (const auto& fn : image_filenames) {
        image_data.emplace_back();
        Preprocess(fn, model_info.type1_, model_info.type3_, model_info.input_c_, 
                  cv::Size(model_info.input_w_, model_info.input_h_), &(image_data.back()));

        if ((image_data.size() == 1) && !preprocess_output_filename.empty()) {
            std::ofstream output_file(preprocess_output_filename);
            std::ostream_iterator<uint8_t> output_iterator(output_file);
            std::copy(image_data[0].begin(), image_data[0].end(), output_iterator);
        }
    }

    std::vector<int64_t> shape;
    shape.push_back(batch_size);
    shape.push_back(model_info.input_c_);
    shape.push_back(model_info.input_h_);
    shape.push_back(model_info.input_w_);

    tc::InferInput* input;
    err = tc::InferInput::Create(&input, model_info.input_name_, shape, model_info.input_datatype_);
    if (!err.IsOk()) {
        std::cerr << "unable to get input: " << err << std::endl;
        exit(1);
    }
    std::shared_ptr<tc::InferInput> input_ptr(input);

    tc::InferRequestedOutput* output;
    err = tc::InferRequestedOutput::Create(&output, model_info.output_name_);
    if (!err.IsOk()) {
        std::cerr << "unable to get output: " << err << std::endl;
        exit(1);
    }
    std::shared_ptr<tc::InferRequestedOutput> output_ptr(output);

    std::vector<tc::InferInput*> inputs = {input_ptr.get()};
    std::vector<const tc::InferRequestedOutput*> outputs = {output_ptr.get()};

    tc::InferOptions options(model_name);
    options.model_version_ = model_version;

    std::vector<std::unique_ptr<tc::InferResult>> results;
    std::vector<std::vector<std::string>> result_filenames;
    size_t image_idx = 0;
    size_t done_cnt = 0;
    size_t sent_count = 0;
    bool last_request = false;
    std::mutex mtx;
    std::condition_variable cv;

    auto callback_func = [&](tc::InferResult* result) 
    {
        {
            std::lock_guard<std::mutex> lk(mtx);
            results.emplace_back(result);
            done_cnt++;
        }
        cv.notify_all();
    };

    while (!last_request) {
        err = input_ptr->Reset();
        if (!err.IsOk()) {
            std::cerr << "failed resetting input: " << err << std::endl;
            exit(1);
        }

        std::vector<std::string> input_filenames;
        for (int idx = 0; idx < batch_size; ++idx) {
            input_filenames.push_back(image_filenames[image_idx]);
            err = input_ptr->AppendRaw(image_data[image_idx]);
            if (!err.IsOk()) {
                std::cerr << "failed setting input: " << err << std::endl;
                exit(1);
            }

            image_idx = (image_idx + 1) % image_data.size();
            if (image_idx == 0) {
                last_request = true;
            }
        }

        result_filenames.emplace_back(std::move(input_filenames));
        options.request_id_ = std::to_string(sent_count);

        double time1 = getTickCount();  
        tc::InferResult* result;
        if (protocol == ProtocolType::HTTP) {
            err = triton_client.http_client_->Infer(
                  &result, options, inputs, outputs, http_headers);
        } else {
            err = triton_client.grpc_client_->Infer(
                  &result, options, inputs, outputs, http_headers);
        }
        if (!err.IsOk()) {
            std::cerr << "failed sending synchronous infer request: " << err
                      << std::endl;
            exit(1);
        }
        results.emplace_back(result);
        double time2 = getTickCount();
        double elapsedTime = (time2 - time1)*1000 / getTickFrequency();
        fprintf(stdout, "inference time:%f ms\n", elapsedTime);
        sent_count++;
    }

    for (size_t idx = 0; idx < results.size(); idx++) {
        std::cout << "Request " << idx << ", batch size " << batch_size << std::endl;
        Postprocess(
            std::move(results[idx]), result_filenames[idx], batch_size,
            model_info.output_name_, model_info.max_batch_size_ != 0);
    }

    return 0;
}