"torch_scatter/std.py" did not exist on "d305ecc0d973fb73cdaff4ac2eba742017751c9a"
Commit 06924f5d authored by mayong's avatar mayong
Browse files

Add the src of libs.

parent 83ff3a7f
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <webrtc_vad.h>
#include "Audio.h"
using namespace std;
class AudioWindow {
private:
int *window;
int in_idx;
int out_idx;
int sum;
int window_size = 0;
public:
AudioWindow(int window_size) : window_size(window_size)
{
window = (int *)calloc(sizeof(int), window_size + 1);
in_idx = 0;
out_idx = 1;
sum = 0;
};
~AudioWindow()
{
free(window);
};
int put(int val)
{
sum = sum + val - window[out_idx];
window[in_idx] = val;
in_idx = in_idx == window_size ? 0 : in_idx + 1;
out_idx = out_idx == window_size ? 0 : out_idx + 1;
return sum;
};
};
AudioFrame::AudioFrame(){};
AudioFrame::AudioFrame(int len) : len(len)
{
start = 0;
};
AudioFrame::~AudioFrame(){};
int AudioFrame::set_start(int val)
{
start = val < 0 ? 0 : val;
return start;
};
int AudioFrame::set_end(int val, int max_len)
{
float num_samples = val - start;
float frame_length = 400;
float frame_shift = 160;
float num_new_samples =
ceil((num_samples - 400) / frame_shift) * frame_shift + frame_length;
end = start + num_new_samples;
len = (int)num_new_samples;
if (end > max_len)
printf("frame end > max_len!!!!!!!\n");
return end;
};
int AudioFrame::get_start()
{
return start;
};
int AudioFrame::get_len()
{
return len;
};
int AudioFrame::disp()
{
printf("not imp!!!!\n");
return 0;
};
Audio::Audio(int data_type) : data_type(data_type)
{
speech_buff = NULL;
speech_data = NULL;
align_size = 1360;
}
Audio::Audio(int data_type, int size) : data_type(data_type)
{
speech_buff = NULL;
speech_data = NULL;
align_size = (float)size;
}
Audio::~Audio()
{
if (speech_buff != NULL) {
free(speech_buff);
free(speech_data);
}
}
void Audio::disp()
{
printf("Audio time is %f s. len is %d\n", (float)speech_len / 16000,
speech_len);
}
void Audio::loadwav(const char *filename)
{
if (speech_buff != NULL) {
free(speech_buff);
free(speech_data);
}
offset = 0;
FILE *fp;
fp = fopen(filename, "rb");
fseek(fp, 0, SEEK_END);
uint32_t nFileLen = ftell(fp);
fseek(fp, 44, SEEK_SET);
speech_len = (nFileLen - 44) / 2;
speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
fclose(fp);
speech_data = (float *)malloc(sizeof(float) * speech_align_len);
memset(speech_data, 0, sizeof(float) * speech_align_len);
int i;
float scale = 1;
if (data_type == 1) {
scale = 32768;
}
for (i = 0; i < speech_len; i++) {
speech_data[i] = (float)speech_buff[i] / scale;
}
AudioFrame *frame = new AudioFrame(speech_len);
frame_queue.push(frame);
}
int Audio::fetch_chunck(float *&dout, int len)
{
if (offset >= speech_align_len) {
dout = NULL;
return S_ERR;
} else if (offset == speech_align_len - len) {
dout = speech_data + offset;
offset = speech_align_len;
// 临时解决
AudioFrame *frame = frame_queue.front();
frame_queue.pop();
delete frame;
return S_END;
} else {
dout = speech_data + offset;
offset += len;
return S_MIDDLE;
}
}
int Audio::fetch(float *&dout, int &len, int &flag)
{
if (frame_queue.size() > 0) {
AudioFrame *frame = frame_queue.front();
frame_queue.pop();
dout = speech_data + frame->get_start();
len = frame->get_len();
delete frame;
flag = S_END;
return 1;
} else {
return 0;
}
}
void Audio::padding()
{
float num_samples = speech_len;
float frame_length = 400;
float frame_shift = 160;
float num_frames = floor((num_samples + (frame_shift / 2)) / frame_shift);
float num_new_samples = (num_frames - 1) * frame_shift + frame_length;
float num_padding = num_new_samples - num_samples;
float num_left_padding = (frame_length - frame_shift) / 2;
float num_right_padding = num_padding - num_left_padding;
float *new_data = (float *)malloc(num_new_samples * sizeof(float));
int i;
int tmp_off = 0;
for (i = 0; i < num_left_padding; i++) {
int ii = num_left_padding - i - 1;
new_data[i] = speech_data[ii];
}
tmp_off = num_left_padding;
memcpy(new_data + tmp_off, speech_data, speech_len * sizeof(float));
tmp_off += speech_len;
for (i = 0; i < num_right_padding; i++) {
int ii = speech_len - i - 1;
new_data[tmp_off + i] = speech_data[ii];
}
free(speech_data);
speech_data = new_data;
speech_len = num_new_samples;
AudioFrame *frame = new AudioFrame(num_new_samples);
frame_queue.push(frame);
frame = frame_queue.front();
frame_queue.pop();
delete frame;
}
#define UNTRIGGERED 0
#define TRIGGERED 1
#define SPEECH_LEN_5S (16000 * 5)
#define SPEECH_LEN_10S (16000 * 10)
#define SPEECH_LEN_20S (16000 * 20)
#define SPEECH_LEN_30S (16000 * 30)
void Audio::split()
{
VadInst *handle = WebRtcVad_Create();
WebRtcVad_Init(handle);
WebRtcVad_set_mode(handle, 2);
int window_size = 10;
AudioWindow audiowindow(window_size);
int status = UNTRIGGERED;
int offset = 0;
int fs = 16000;
int step = 480;
AudioFrame *frame;
frame = frame_queue.front();
frame_queue.pop();
delete frame;
frame = NULL;
while (offset < speech_len - step) {
int n = WebRtcVad_Process(handle, fs, speech_buff + offset, step);
if (status == UNTRIGGERED && audiowindow.put(n) >= window_size - 1) {
frame = new AudioFrame();
int start = offset - step * (window_size - 1);
frame->set_start(start);
status = TRIGGERED;
} else if (status == TRIGGERED) {
int win_weight = audiowindow.put(n);
int voice_len = (offset - frame->get_start());
int gap = 0;
if (voice_len < SPEECH_LEN_5S) {
offset += step;
continue;
} else if (voice_len < SPEECH_LEN_10S) {
gap = 1;
} else if (voice_len < SPEECH_LEN_20S) {
gap = window_size / 5;
} else {
gap = window_size / 2;
}
if (win_weight < gap) {
status = UNTRIGGERED;
offset = frame->set_end(offset, speech_align_len);
frame_queue.push(frame);
frame = NULL;
}
}
offset += step;
}
if (frame != NULL) {
frame->set_end(speech_len, speech_align_len);
frame_queue.push(frame);
frame = NULL;
}
WebRtcVad_Free(handle);
}
file(GLOB files1 "*.cpp")
file(GLOB files4 "paraformer/*.cpp")
set(files ${files1} ${files2} ${files3} ${files4})
# message("${files}")
add_library(rapidasr ${files})
if(WIN32)
set(EXTRA_LIBS libfftw3f-3 libopenblas webrtcvad)
if(CMAKE_CL_64)
target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
else()
target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
endif()
target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include ${CMAKE_SOURCE_DIR}/win/include/openblas)
else()
set(EXTRA_LIBS fftw3f openblas webrtcvad pthread)
target_include_directories(rapidasr PUBLIC "/usr/local/opt/fftw/include")
target_link_directories(rapidasr PUBLIC "/usr/local/opt/fftw/lib")
target_include_directories(rapidasr PUBLIC "/usr/local/opt/openblas/include")
target_link_directories(rapidasr PUBLIC "/usr/local/opt/openblas/lib")
target_include_directories(rapidasr PUBLIC "/usr/include")
target_link_directories(rapidasr PUBLIC "/usr/lib64")
target_include_directories(rapidasr PUBLIC ${OPENBLAS_INCLUDE_DIR} ${FFTW3F_INCLUDE_DIR})
target_link_directories(rapidasr PUBLIC ${OPENBLAS_LIBRARY_DIR} ${FFTW3F_LIBRARY_DIR})
endif()
include_directories(${ONNXRUNTIME_DIR}\\include)
message(${ONNXRUNTIME_DIR}\\lib)
include_directories(${CMAKE_SOURCE_DIR}/include)
target_link_libraries(rapidasr PUBLIC onnxruntime ${EXTRA_LIBS})
#ifndef COMMONSTRUCT_H
#define COMMONSTRUCT_H
#endif
This diff is collapsed.
#ifndef FEATUREEXTRACT_H
#define FEATUREEXTRACT_H
#include <fftw3.h>
#include <stdint.h>
#include "FeatureQueue.h"
#include "SpeechWrap.h"
#include "Tensor.h"
class FeatureExtract {
private:
SpeechWrap speech;
FeatureQueue fqueue;
int mode;
float *fft_input;
fftwf_complex *fft_out;
fftwf_plan p;
void fftw_init();
void melspect(float *din, float *dout);
void global_cmvn(float *din);
public:
FeatureExtract(int mode);
~FeatureExtract();
int size();
int status();
void reset();
void insert(float *din, int len, int flag);
bool fetch(Tensor<float> *&dout);
};
#endif
#include "FeatureQueue.h"
#include "CommonStruct.h"
#include <string>
#include <ComDefine.h>
FeatureQueue::FeatureQueue()
{
buff = new Tensor<float>(67, 80);
window_size = 67;
buff_idx = 0;
}
FeatureQueue::~FeatureQueue()
{
delete buff;
}
void FeatureQueue::reinit(int size)
{
delete buff;
buff = new Tensor<float>(size, 80);
buff_idx = 0;
window_size = size;
}
void FeatureQueue::reset()
{
buff_idx = 0;
}
void FeatureQueue::push(float *din, int flag)
{
int offset = buff_idx * 80;
memcpy(buff->buff + offset, din, 80 * sizeof(float));
buff_idx++;
if (flag == S_END) {
Tensor<float> *tmp = new Tensor<float>(buff_idx, 80);
memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float));
feature_queue.push(tmp);
buff_idx = 0;
} else if (buff_idx == window_size) {
feature_queue.push(buff);
Tensor<float> *tmp = new Tensor<float>(window_size, 80);
memcpy(tmp->buff, buff->buff + (window_size - 3) * 80,
3 * 80 * sizeof(float));
buff_idx = 3;
buff = tmp;
}
}
Tensor<float> *FeatureQueue::pop()
{
Tensor<float> *tmp = feature_queue.front();
feature_queue.pop();
return tmp;
}
int FeatureQueue::size()
{
return feature_queue.size();
}
#ifndef FEATUREQUEUE_H
#define FEATUREQUEUE_H
#include "Tensor.h"
#include <queue>
#include <stdint.h>
using namespace std;
class FeatureQueue {
private:
queue<Tensor<float> *> feature_queue;
Tensor<float> *buff;
int buff_idx;
int window_size;
public:
FeatureQueue();
~FeatureQueue();
void reinit(int size);
void reset();
void push(float *din, int flag);
Tensor<float> *pop();
int size();
};
#endif
#include "precomp.h"
Model *create_model(const char *path, int mode)
{
Model *mm;
mm = new paraformer::ModelImp(path, mode);
return mm;
}
#include "precomp.h"
SpeechWrap::SpeechWrap()
{
cache_size = 0;
}
SpeechWrap::~SpeechWrap()
{
}
void SpeechWrap::reset()
{
cache_size = 0;
}
void SpeechWrap::load(float *din, int len)
{
in = din;
in_size = len;
total_size = cache_size + in_size;
}
int SpeechWrap::size()
{
return total_size;
}
void SpeechWrap::update(int offset)
{
int in_offset = offset - cache_size;
cache_size = (total_size - offset);
memcpy(cache, in + in_offset, cache_size * sizeof(float));
}
float &SpeechWrap::operator[](int i)
{
return i < cache_size ? cache[i] : in[i - cache_size];
}
#ifndef SPEECHWRAP_H
#define SPEECHWRAP_H
#include <stdint.h>
class SpeechWrap {
private:
float cache[400];
int cache_size;
float *in;
int in_size;
int total_size;
int next_cache_size;
public:
SpeechWrap();
~SpeechWrap();
void load(float *din, int len);
void update(int offset);
void reset();
int size();
float &operator[](int i);
};
#endif
#ifndef TENSOR_H
#define TENSOR_H
#include "alignedmem.h"
using namespace std;
template <typename T> class Tensor {
private:
void alloc_buff();
void free_buff();
int mem_size;
public:
T *buff;
int size[4];
int buff_size;
Tensor(Tensor<T> *in);
Tensor(int a);
Tensor(int a, int b);
Tensor(int a, int b, int c);
Tensor(int a, int b, int c, int d);
~Tensor();
void zeros();
void shape();
void disp();
void dump(const char *mode);
void concat(Tensor<T> *din, int dim);
void resize(int a, int b, int c, int d);
void add(float coe, Tensor<T> *in);
void add(Tensor<T> *in);
void add(Tensor<T> *in1, Tensor<T> *in2);
void reload(Tensor<T> *in);
};
template <typename T> Tensor<T>::Tensor(int a) : size{1, 1, 1, a}
{
alloc_buff();
}
template <typename T> Tensor<T>::Tensor(int a, int b) : size{1, 1, a, b}
{
alloc_buff();
}
template <typename T> Tensor<T>::Tensor(int a, int b, int c) : size{1, a, b, c}
{
alloc_buff();
}
template <typename T>
Tensor<T>::Tensor(int a, int b, int c, int d) : size{a, b, c, d}
{
alloc_buff();
}
template <typename T> Tensor<T>::Tensor(Tensor<T> *in)
{
memcpy(size, in->size, 4 * sizeof(int));
alloc_buff();
memcpy(buff, in->buff, in->buff_size * sizeof(T));
}
template <typename T> Tensor<T>::~Tensor()
{
free_buff();
}
template <typename T> void Tensor<T>::alloc_buff()
{
buff_size = size[0] * size[1] * size[2] * size[3];
mem_size = buff_size;
buff = (T *)aligned_malloc(32, buff_size * sizeof(T));
}
template <typename T> void Tensor<T>::free_buff()
{
aligned_free(buff);
}
template <typename T> void Tensor<T>::zeros()
{
memset(buff, 0, buff_size * sizeof(T));
}
template <typename T> void Tensor<T>::shape()
{
printf("(%d,%d,%d,%d)\n", size[0], size[1], size[2], size[3]);
}
// TODO:: fix it!!!!
template <typename T> void Tensor<T>::concat(Tensor<T> *din, int dim)
{
memcpy(buff + buff_size, din->buff, din->buff_size * sizeof(T));
buff_size += din->buff_size;
size[dim] += din->size[dim];
}
// TODO:: fix it!!!!
template <typename T> void Tensor<T>::resize(int a, int b, int c, int d)
{
size[0] = a;
size[1] = b;
size[2] = c;
size[3] = d;
buff_size = size[0] * size[1] * size[2] * size[3];
}
template <typename T> void Tensor<T>::add(float coe, Tensor<T> *in)
{
int i;
for (i = 0; i < buff_size; i++) {
buff[i] = buff[i] + coe * in->buff[i];
}
}
template <typename T> void Tensor<T>::add(Tensor<T> *in)
{
int i;
for (i = 0; i < buff_size; i++) {
buff[i] = buff[i] + in->buff[i];
}
}
template <typename T> void Tensor<T>::add(Tensor<T> *in1, Tensor<T> *in2)
{
int i;
for (i = 0; i < buff_size; i++) {
buff[i] = buff[i] + in1->buff[i] + in2->buff[i];
}
}
template <typename T> void Tensor<T>::reload(Tensor<T> *in)
{
memcpy(buff, in->buff, in->buff_size * sizeof(T));
}
template <typename T> void Tensor<T>::disp()
{
int i;
for (i = 0; i < buff_size; i++) {
cout << buff[i] << " ";
}
cout << endl;
}
template <typename T> void Tensor<T>::dump(const char *mode)
{
FILE *fp;
fp = fopen("tmp.bin", mode);
fwrite(buff, 1, buff_size * sizeof(T), fp);
fclose(fp);
}
#endif
#include "Vocab.h"
#include <fstream>
#include <iostream>
#include <list>
#include <sstream>
#include <string>
using namespace std;
Vocab::Vocab(const char *filename)
{
ifstream in(filename);
string line;
if (in) // 有该文件
{
while (getline(in, line)) // line中不包括每行的换行符
{
vocab.push_back(line);
}
// cout << vocab[1719] << endl;
}
// else // 没有该文件
//{
// cout << "no such file" << endl;
// }
}
Vocab::~Vocab()
{
}
string Vocab::vector2string(vector<int> in)
{
int i;
stringstream ss;
for (auto it = in.begin(); it != in.end(); it++) {
ss << vocab[*it];
}
return ss.str();
}
int str2int(string str)
{
const char *ch_array = str.c_str();
if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
((ch_array[2] & 0xc0) != 0x80))
return 0;
int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
(ch_array[2] & 0x3f);
return val;
}
bool Vocab::isChinese(string ch)
{
if (ch.size() != 3) {
return false;
}
int unicode = str2int(ch);
if (unicode >= 19968 && unicode <= 40959) {
return true;
}
return false;
}
string Vocab::vector2stringV2(vector<int> in)
{
int i;
list<string> words;
int is_pre_english = false;
int pre_english_len = 0;
int is_combining = false;
string combine = "";
for (auto it = in.begin(); it != in.end(); it++) {
string word = vocab[*it];
// step1 space character skips
if (word == "<s>" || word == "</s>" || word == "<unk>")
continue;
// step2 combie phoneme to full word
{
int sub_word = !(word.find("@@") == string::npos);
// process word start and middle part
if (sub_word) {
combine += word.erase(word.length() - 2);
is_combining = true;
continue;
}
// process word end part
else if (is_combining) {
combine += word;
is_combining = false;
word = combine;
combine = "";
}
}
// step3 process english word deal with space , turn abbreviation to upper case
{
// input word is chinese, not need process
if (isChinese(word)) {
words.push_back(word);
is_pre_english = false;
}
// input word is english word
else {
// pre word is chinese
if (!is_pre_english) {
word[0] = word[0] - 32;
words.push_back(word);
pre_english_len = word.size();
}
// pre word is english word
else {
// single letter turn to upper case
if (word.size() == 1) {
word[0] = word[0] - 32;
}
if (pre_english_len > 1) {
words.push_back(" ");
words.push_back(word);
pre_english_len = word.size();
}
else {
if (word.size() > 1) {
words.push_back(" ");
}
words.push_back(word);
pre_english_len = word.size();
}
}
is_pre_english = true;
}
}
}
// for (auto it = words.begin(); it != words.end(); it++) {
// cout << *it << endl;
// }
stringstream ss;
for (auto it = words.begin(); it != words.end(); it++) {
ss << *it;
}
return ss.str();
}
int Vocab::size()
{
return vocab.size();
}
#ifndef VOCAB_H
#define VOCAB_H
#include <stdint.h>
#include <string>
#include <vector>
using namespace std;
class Vocab {
private:
vector<string> vocab;
bool isChinese(string ch);
bool isEnglish(string ch);
public:
Vocab(const char *filename);
~Vocab();
int size();
string vector2string(vector<int> in);
string vector2stringV2(vector<int> in);
};
#endif
#include "precomp.h"
void *aligned_malloc(size_t alignment, size_t required_bytes)
{
void *p1; // original block
void **p2; // aligned block
int offset = alignment - 1 + sizeof(void *);
if ((p1 = (void *)malloc(required_bytes + offset)) == NULL) {
return NULL;
}
p2 = (void **)(((size_t)(p1) + offset) & ~(alignment - 1));
p2[-1] = p1;
return p2;
}
void aligned_free(void *p)
{
free(((void **)p)[-1]);
}
#ifndef ALIGNEDMEM_H
#define ALIGNEDMEM_H
extern void *aligned_malloc(size_t alignment, size_t required_bytes);
extern void aligned_free(void *p);
#endif
#pragma once
#ifdef _WIN32
#include <codecvt>
inline std::wstring string2wstring(const std::string& str, const std::string& locale)
{
typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
std::wstring_convert<F> strCnv(new F(locale));
return strCnv.from_bytes(str);
}
inline std::wstring strToWstr(std::string str) {
if (str.length() == 0)
return L"";
return string2wstring(str, "zh-CN");
}
#endif
inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) {
size_t numInputNodes = session->GetInputCount();
if (numInputNodes > 0) {
Ort::AllocatorWithDefaultOptions allocator;
{
auto t = session->GetInputNameAllocated(nIndex, allocator);
inputName = t.get();
}
}
}
inline void getOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
size_t numOutputNodes = session->GetOutputCount();
if (numOutputNodes > 0) {
Ort::AllocatorWithDefaultOptions allocator;
{
auto t = session->GetOutputNameAllocated(nIndex, allocator);
outputName = t.get();
}
}
}
\ No newline at end of file
#include "precomp.h"
using namespace std;
using namespace paraformer;
ModelImp::ModelImp(const char* path, int mode,int nNumThread)
{
string model_path = pathAppend(path, "model.onnx");
string vocab_path = pathAppend(path, "vocab.txt");
fe = new FeatureExtract(mode);
//p_helper = new ModelParamsHelper(wenet_path.c_str(), 500);
//encoder = new Encoder(&p_helper->params.encoder);
//predictor = new Predictor(&p_helper->params.predictor);
//decoder = new Decoder(&p_helper->params.decoder);
sessionOptions.SetInterOpNumThreads(nNumThread);
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
#ifdef _WIN32
wstring wstrPath = strToWstr(model_path);
m_session = new Ort::Session(env, wstrPath.c_str(), sessionOptions);
#else
m_session = new Ort::Session(env, model_path.c_str(), sessionOptions);
#endif
string strName;
getInputName(m_session, strName);
m_strInputNames.push_back(strName.c_str());
getInputName(m_session, strName,1);
m_strInputNames.push_back(strName);
getOutputName(m_session, strName);
m_strOutputNames.push_back(strName);
getOutputName(m_session, strName,1);
m_strOutputNames.push_back(strName);
for (auto& item : m_strInputNames)
m_szInputNames.push_back(item.c_str());
for (auto& item : m_strOutputNames)
m_szOutputNames.push_back(item.c_str());
vocab = new Vocab(vocab_path.c_str());
}
ModelImp::~ModelImp()
{
delete fe;
//delete p_helper;
//delete encoder;
//delete predictor;
//
//delete decoder;
if (m_session)
{
delete m_session;
m_session = nullptr;
}
delete vocab;
}
void ModelImp::reset()
{
fe->reset();
}
void ModelImp::apply_lfr(Tensor<float>*& din)
{
int mm = din->size[2];
int ll = ceil(mm / 6.0);
Tensor<float>* tmp = new Tensor<float>(ll, 560);
int i, j;
int out_offset = 0;
for (i = 0; i < ll; i++) {
for (j = 0; j < 7; j++) {
int idx = i * 6 + j - 3;
if (idx < 0) {
idx = 0;
}
if (idx >= mm) {
idx = mm - 1;
}
memcpy(tmp->buff + out_offset, din->buff + idx * 80,
sizeof(float) * 80);
out_offset += 80;
}
}
delete din;
din = tmp;
}
void ModelImp::apply_cmvn(Tensor<float>* din)
{
const float* var;
const float* mean;
float scale = 22.6274169979695;
int m = din->size[2];
int n = din->size[3];
var = (const float*)paraformer_cmvn_var_hex;
mean = (const float*)paraformer_cmvn_mean_hex;
int i, j;
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
int idx = i * n + j;
din->buff[idx] = (din->buff[idx] + mean[j]) * var[j];
}
}
}
string ModelImp::greedy_search(float * in, int nLen )
{
vector<int> hyps;
int Tmax = nLen;
int i;
for (i = 0; i < Tmax; i++) {
int max_idx;
float max_val;
findmax(in + i * 8404, 8404, max_val, max_idx);
hyps.push_back(max_idx);
}
return vocab->vector2stringV2(hyps);
}
string ModelImp::forward(float* din, int len, int flag)
{
Tensor<float>* in;
fe->insert(din, len, flag);
fe->fetch(in);
apply_lfr(in);
apply_cmvn(in);
//encoder->forward(in);
//Tensor<float> enc_out(in);
//predictor->forward(in);
//decoder->forward(in, &enc_out);
//int64_t speech_len = in->size[2];
//Ort::Value inputTensor = Ort::Value::CreateTensor<float>(m_memoryInfo, in->buff, in->buff_size, input_shape_.data(), input_shape_.size());
Ort::RunOptions run_option;
std::array<int64_t, 3> input_shape_{ in->size[0],in->size[2],in->size[3] };
Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
in->buff,
in->buff_size,
input_shape_.data(),
input_shape_.size());
std::vector<int32_t> feats_len{ in->size[2] };
std::vector<int64_t> feats_len_dim{ 1 };
Ort::Value onnx_feats_len = Ort::Value::CreateTensor(
m_memoryInfo,
feats_len.data(),
feats_len.size() * sizeof(int32_t),
feats_len_dim.data(),
feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
std::vector<Ort::Value> input_onnx;
input_onnx.emplace_back(std::move(onnx_feats));
input_onnx.emplace_back(std::move(onnx_feats_len));
//auto output = m_session_encoder->Run(run_option,
// m_strEncInputName.data(),
// input_onnx.data(),
// m_strEncInputName.size(),
// m_strEncOutputName.data(),
// m_strEncOutputName.size()
//);
auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size());
//assert(outputTensor.size() == 1 && outputTensor[0].IsTensor());
std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
float* floatData = outputTensor[0].GetTensorMutableData<float>();
auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
//float* floatSize = outputTensor[1].GetTensorMutableData<float>();
//std::vector<float> out_data(floatArray, floatArray + outputCount);
string result = greedy_search(floatData, *encoder_out_lens);
if(in)
delete in;
return result;
}
string ModelImp::forward_chunk(float* din, int len, int flag)
{
printf("Not Imp!!!!!!\n");
return "Hello";
}
string ModelImp::rescoring()
{
printf("Not Imp!!!!!!\n");
return "Hello";
}
#pragma once
#ifndef PARAFORMER_MODELIMP_H
#define PARAFORMER_MODELIMP_H
namespace paraformer {
class ModelImp : public Model {
private:
FeatureExtract* fe;
Vocab* vocab;
void apply_lfr(Tensor<float>*& din);
void apply_cmvn(Tensor<float>* din);
string greedy_search( float* in, int nLen);
#ifdef _WIN_X86
Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
#else
Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
#endif
Ort::Session* m_session = nullptr;
Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "paraformer");
Ort::SessionOptions sessionOptions = Ort::SessionOptions();
vector<string> m_strInputNames, m_strOutputNames;
vector<const char*> m_szInputNames;
vector<const char*> m_szOutputNames;
//string m_strInputName, m_strInputNameLen;
//string m_strOutputName, m_strOutputNameLen;
public:
ModelImp(const char* path, int mode, int nNumThread=4);
~ModelImp();
void reset();
string forward_chunk(float* din, int len, int flag);
string forward(float* din, int len, int flag);
string rescoring();
};
} // namespace paraformer
#endif
#pragma once
// system
#include "alignedmem.h"
#include <iostream>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <deque>
#include <iostream>
#include <list>
#include <locale.h>
#include <vector>
#include <string>
#include <math.h>
#include <numeric>
using namespace std;
// third part
#include <fftw3.h>
#include "onnxruntime_run_options_config_keys.h"
#include "onnxruntime_cxx_api.h"
// mine
#include "commonfunc.h"
#include <ComDefine.h>
#include "predefine_coe.h"
#include "Vocab.h"
#include "util.h"
#include "CommonStruct.h"
#include "FeatureExtract.h"
#include "SpeechWrap.h"
#include "Model.h"
#include "paraformer_onnx.h"
using namespace paraformer;
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment