#include "precomp.h" using namespace std; using namespace paraformer; ModelImp::ModelImp(const char* path, int mode,int nNumThread) { string model_path = pathAppend(path, "model.onnx"); string vocab_path = pathAppend(path, "vocab.txt"); fe = new FeatureExtract(mode); //p_helper = new ModelParamsHelper(wenet_path.c_str(), 500); //encoder = new Encoder(&p_helper->params.encoder); //predictor = new Predictor(&p_helper->params.predictor); //decoder = new Decoder(&p_helper->params.decoder); sessionOptions.SetInterOpNumThreads(nNumThread); sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); #ifdef _WIN32 wstring wstrPath = strToWstr(model_path); m_session = new Ort::Session(env, wstrPath.c_str(), sessionOptions); #else m_session = new Ort::Session(env, model_path.c_str(), sessionOptions); #endif string strName; getInputName(m_session, strName); m_strInputNames.push_back(strName.c_str()); getInputName(m_session, strName,1); m_strInputNames.push_back(strName); getOutputName(m_session, strName); m_strOutputNames.push_back(strName); getOutputName(m_session, strName,1); m_strOutputNames.push_back(strName); for (auto& item : m_strInputNames) m_szInputNames.push_back(item.c_str()); for (auto& item : m_strOutputNames) m_szOutputNames.push_back(item.c_str()); vocab = new Vocab(vocab_path.c_str()); } ModelImp::~ModelImp() { delete fe; //delete p_helper; //delete encoder; //delete predictor; // //delete decoder; if (m_session) { delete m_session; m_session = nullptr; } delete vocab; } void ModelImp::reset() { fe->reset(); } void ModelImp::apply_lfr(Tensor*& din) { int mm = din->size[2]; int ll = ceil(mm / 6.0); Tensor* tmp = new Tensor(ll, 560); int i, j; int out_offset = 0; for (i = 0; i < ll; i++) { for (j = 0; j < 7; j++) { int idx = i * 6 + j - 3; if (idx < 0) { idx = 0; } if (idx >= mm) { idx = mm - 1; } memcpy(tmp->buff + out_offset, din->buff + idx * 80, sizeof(float) * 80); out_offset += 80; } } delete din; din = tmp; } void ModelImp::apply_cmvn(Tensor* din) { const float* var; const float* mean; float scale = 22.6274169979695; int m = din->size[2]; int n = din->size[3]; var = (const float*)paraformer_cmvn_var_hex; mean = (const float*)paraformer_cmvn_mean_hex; int i, j; for (i = 0; i < m; i++) { for (j = 0; j < n; j++) { int idx = i * n + j; din->buff[idx] = (din->buff[idx] + mean[j]) * var[j]; } } } string ModelImp::greedy_search(float * in, int nLen ) { vector hyps; int Tmax = nLen; int i; for (i = 0; i < Tmax; i++) { int max_idx; float max_val; findmax(in + i * 8404, 8404, max_val, max_idx); hyps.push_back(max_idx); } return vocab->vector2stringV2(hyps); } string ModelImp::forward(float* din, int len, int flag) { Tensor* in; fe->insert(din, len, flag); fe->fetch(in); apply_lfr(in); apply_cmvn(in); //encoder->forward(in); //Tensor enc_out(in); //predictor->forward(in); //decoder->forward(in, &enc_out); //int64_t speech_len = in->size[2]; //Ort::Value inputTensor = Ort::Value::CreateTensor(m_memoryInfo, in->buff, in->buff_size, input_shape_.data(), input_shape_.size()); Ort::RunOptions run_option; std::array input_shape_{ in->size[0],in->size[2],in->size[3] }; Ort::Value onnx_feats = Ort::Value::CreateTensor(m_memoryInfo, in->buff, in->buff_size, input_shape_.data(), input_shape_.size()); std::vector feats_len{ in->size[2] }; std::vector feats_len_dim{ 1 }; Ort::Value onnx_feats_len = Ort::Value::CreateTensor( m_memoryInfo, feats_len.data(), feats_len.size() * sizeof(int32_t), feats_len_dim.data(), feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32); std::vector input_onnx; input_onnx.emplace_back(std::move(onnx_feats)); input_onnx.emplace_back(std::move(onnx_feats_len)); //auto output = m_session_encoder->Run(run_option, // m_strEncInputName.data(), // input_onnx.data(), // m_strEncInputName.size(), // m_strEncOutputName.data(), // m_strEncOutputName.size() //); auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size()); //assert(outputTensor.size() == 1 && outputTensor[0].IsTensor()); std::vector outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape(); int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies()); float* floatData = outputTensor[0].GetTensorMutableData(); auto encoder_out_lens = outputTensor[1].GetTensorMutableData(); //float* floatSize = outputTensor[1].GetTensorMutableData(); //std::vector out_data(floatArray, floatArray + outputCount); string result = greedy_search(floatData, *encoder_out_lens); if(in) delete in; return result; } string ModelImp::forward_chunk(float* din, int len, int flag) { printf("Not Imp!!!!!!\n"); return "Hello"; } string ModelImp::rescoring() { printf("Not Imp!!!!!!\n"); return "Hello"; }