Add the src of libs.

06924f5d · mayong · 83ff3a7f · 06924f5d · 06924f5d · 06924f5d
Commit 06924f5d authored Feb 26, 2023 by mayong
20 changed files
--- a/cpp_onnx/src/lib/Audio.cpp
+++ b/cpp_onnx/src/lib/Audio.cpp
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <webrtc_vad.h>
+#include "Audio.h"
+using namespace std;
+class AudioWindow {
+  private:
+    int *window;
+    int in_idx;
+    int out_idx;
+    int sum;
+    int window_size = 0;
+  public:
+    AudioWindow(int window_size) : window_size(window_size)
+    {
+        window = (int *)calloc(sizeof(int), window_size + 1);
+        in_idx = 0;
+        out_idx = 1;
+        sum = 0;
+    };
+    ~AudioWindow()
+    {
+        free(window);
+    };
+    int put(int val)
+    {
+        sum = sum + val - window[out_idx];
+        window[in_idx] = val;
+        in_idx = in_idx == window_size ? 0 : in_idx + 1;
+        out_idx = out_idx == window_size ? 0 : out_idx + 1;
+        return sum;
+    };
+};
+AudioFrame::AudioFrame(){};
+AudioFrame::AudioFrame(int len) : len(len)
+{
+    start = 0;
+};
+AudioFrame::~AudioFrame(){};
+int AudioFrame::set_start(int val)
+{
+    start = val < 0 ? 0 : val;
+    return start;
+};
+int AudioFrame::set_end(int val, int max_len)
+{
+    float num_samples = val - start;
+    float frame_length = 400;
+    float frame_shift = 160;
+    float num_new_samples =
+        ceil((num_samples - 400) / frame_shift) * frame_shift + frame_length;
+    end = start + num_new_samples;
+    len = (int)num_new_samples;
+    if (end > max_len)
+        printf("frame end > max_len!!!!!!!\n");
+    return end;
+};
+int AudioFrame::get_start()
+{
+    return start;
+};
+int AudioFrame::get_len()
+{
+    return len;
+};
+int AudioFrame::disp()
+{
+    printf("not imp!!!!\n");
+    return 0;
+};
+Audio::Audio(int data_type) : data_type(data_type)
+{
+    speech_buff = NULL;
+    speech_data = NULL;
+    align_size = 1360;
+}
+Audio::Audio(int data_type, int size) : data_type(data_type)
+{
+    speech_buff = NULL;
+    speech_data = NULL;
+    align_size = (float)size;
+}
+Audio::~Audio()
+{
+    if (speech_buff != NULL) {
+        free(speech_buff);
+        free(speech_data);
+    }
+}
+void Audio::disp()
+{
+    printf("Audio time is %f s. len is %d\n", (float)speech_len / 16000,
+           speech_len);
+}
+void Audio::loadwav(const char *filename)
+{
+    if (speech_buff != NULL) {
+        free(speech_buff);
+        free(speech_data);
+    }
+    offset = 0;
+    FILE *fp;
+    fp = fopen(filename, "rb");
+    fseek(fp, 0, SEEK_END);
+    uint32_t nFileLen = ftell(fp);
+    fseek(fp, 44, SEEK_SET);
+    speech_len = (nFileLen - 44) / 2;
+    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
+    memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+    int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
+    fclose(fp);
+    speech_data = (float *)malloc(sizeof(float) * speech_align_len);
+    memset(speech_data, 0, sizeof(float) * speech_align_len);
+    int i;
+    float scale = 1;
+    if (data_type == 1) {
+        scale = 32768;
+    }
+    for (i = 0; i < speech_len; i++) {
+        speech_data[i] = (float)speech_buff[i] / scale;
+    }
+    AudioFrame *frame = new AudioFrame(speech_len);
+    frame_queue.push(frame);
+}
+int Audio::fetch_chunck(float *&dout, int len)
+{
+    if (offset >= speech_align_len) {
+        dout = NULL;
+        return S_ERR;
+    } else if (offset == speech_align_len - len) {
+        dout = speech_data + offset;
+        offset = speech_align_len;
+        // 临时解决
+        AudioFrame *frame = frame_queue.front();
+        frame_queue.pop();
+        delete frame;
+        return S_END;
+    } else {
+        dout = speech_data + offset;
+        offset += len;
+        return S_MIDDLE;
+    }
+}
+int Audio::fetch(float *&dout, int &len, int &flag)
+{
+    if (frame_queue.size() > 0) {
+        AudioFrame *frame = frame_queue.front();
+        frame_queue.pop();
+        dout = speech_data + frame->get_start();
+        len = frame->get_len();
+        delete frame;
+        flag = S_END;
+        return 1;
+    } else {
+        return 0;
+    }
+}
+void Audio::padding()
+{
+    float num_samples = speech_len;
+    float frame_length = 400;
+    float frame_shift = 160;
+    float num_frames = floor((num_samples + (frame_shift / 2)) / frame_shift);
+    float num_new_samples = (num_frames - 1) * frame_shift + frame_length;
+    float num_padding = num_new_samples - num_samples;
+    float num_left_padding = (frame_length - frame_shift) / 2;
+    float num_right_padding = num_padding - num_left_padding;
+    float *new_data = (float *)malloc(num_new_samples * sizeof(float));
+    int i;
+    int tmp_off = 0;
+    for (i = 0; i < num_left_padding; i++) {
+        int ii = num_left_padding - i - 1;
+        new_data[i] = speech_data[ii];
+    }
+    tmp_off = num_left_padding;
+    memcpy(new_data + tmp_off, speech_data, speech_len * sizeof(float));
+    tmp_off += speech_len;
+    for (i = 0; i < num_right_padding; i++) {
+        int ii = speech_len - i - 1;
+        new_data[tmp_off + i] = speech_data[ii];
+    }
+    free(speech_data);
+    speech_data = new_data;
+    speech_len = num_new_samples;
+    AudioFrame *frame = new AudioFrame(num_new_samples);
+    frame_queue.push(frame);
+    frame = frame_queue.front();
+    frame_queue.pop();
+    delete frame;
+}
+#define UNTRIGGERED 0
+#define TRIGGERED   1
+#define SPEECH_LEN_5S  (16000 * 5)
+#define SPEECH_LEN_10S (16000 * 10)
+#define SPEECH_LEN_20S (16000 * 20)
+#define SPEECH_LEN_30S (16000 * 30)
+void Audio::split()
+{
+    VadInst *handle = WebRtcVad_Create();
+    WebRtcVad_Init(handle);
+    WebRtcVad_set_mode(handle, 2);
+    int window_size = 10;
+    AudioWindow audiowindow(window_size);
+    int status = UNTRIGGERED;
+    int offset = 0;
+    int fs = 16000;
+    int step = 480;
+    AudioFrame *frame;
+    frame = frame_queue.front();
+    frame_queue.pop();
+    delete frame;
+    frame = NULL;
+    while (offset < speech_len - step) {
+        int n = WebRtcVad_Process(handle, fs, speech_buff + offset, step);
+        if (status == UNTRIGGERED && audiowindow.put(n) >= window_size - 1) {
+            frame = new AudioFrame();
+            int start = offset - step * (window_size - 1);
+            frame->set_start(start);
+            status = TRIGGERED;
+        } else if (status == TRIGGERED) {
+            int win_weight = audiowindow.put(n);
+            int voice_len = (offset - frame->get_start());
+            int gap = 0;
+            if (voice_len < SPEECH_LEN_5S) {
+                offset += step;
+                continue;
+            } else if (voice_len < SPEECH_LEN_10S) {
+                gap = 1;
+            } else if (voice_len < SPEECH_LEN_20S) {
+                gap = window_size / 5;
+            } else {
+                gap = window_size / 2;
+            }
+            if (win_weight < gap) {
+                status = UNTRIGGERED;
+                offset = frame->set_end(offset, speech_align_len);
+                frame_queue.push(frame);
+                frame = NULL;
+            }
+        }
+        offset += step;
+    }
+    if (frame != NULL) {
+        frame->set_end(speech_len, speech_align_len);
+        frame_queue.push(frame);
+        frame = NULL;
+    }
+    WebRtcVad_Free(handle);
+}
--- a/cpp_onnx/src/lib/CMakeLists.txt
+++ b/cpp_onnx/src/lib/CMakeLists.txt
+file(GLOB files1 "*.cpp")
+file(GLOB files4 "paraformer/*.cpp")
+set(files ${files1} ${files2} ${files3} ${files4})
+# message("${files}")
+add_library(rapidasr ${files})
+if(WIN32)
+        set(EXTRA_LIBS libfftw3f-3 libopenblas webrtcvad)
+        if(CMAKE_CL_64)
+            target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x64)
+        else()
+            target_link_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/lib/x86)
+        endif()
+        target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include ${CMAKE_SOURCE_DIR}/win/include/openblas)
+else()
+    set(EXTRA_LIBS fftw3f openblas webrtcvad pthread)
+    target_include_directories(rapidasr PUBLIC "/usr/local/opt/fftw/include")
+    target_link_directories(rapidasr PUBLIC "/usr/local/opt/fftw/lib")
+    target_include_directories(rapidasr PUBLIC "/usr/local/opt/openblas/include")
+    target_link_directories(rapidasr PUBLIC "/usr/local/opt/openblas/lib")
+    target_include_directories(rapidasr PUBLIC "/usr/include")
+    target_link_directories(rapidasr PUBLIC "/usr/lib64")
+    target_include_directories(rapidasr PUBLIC ${OPENBLAS_INCLUDE_DIR} ${FFTW3F_INCLUDE_DIR})
+    target_link_directories(rapidasr PUBLIC ${OPENBLAS_LIBRARY_DIR} ${FFTW3F_LIBRARY_DIR})
+endif()
+include_directories(${ONNXRUNTIME_DIR}\\include)
+message(${ONNXRUNTIME_DIR}\\lib)
+include_directories(${CMAKE_SOURCE_DIR}/include)
+target_link_libraries(rapidasr PUBLIC onnxruntime ${EXTRA_LIBS})
--- a/cpp_onnx/src/lib/CommonStruct.h
+++ b/cpp_onnx/src/lib/CommonStruct.h
+#ifndef COMMONSTRUCT_H
+#define COMMONSTRUCT_H
+#endif
--- a/cpp_onnx/src/lib/FeatureExtract.cpp
+++ b/cpp_onnx/src/lib/FeatureExtract.cpp
--- a/cpp_onnx/src/lib/FeatureExtract.h
+++ b/cpp_onnx/src/lib/FeatureExtract.h
+#ifndef FEATUREEXTRACT_H
+#define FEATUREEXTRACT_H
+#include <fftw3.h>
+#include <stdint.h>
+#include "FeatureQueue.h"
+#include "SpeechWrap.h"
+#include "Tensor.h"
+class FeatureExtract {
+  private:
+    SpeechWrap speech;
+    FeatureQueue fqueue;
+    int mode;
+    float *fft_input;
+    fftwf_complex *fft_out;
+    fftwf_plan p;
+    void fftw_init();
+    void melspect(float *din, float *dout);
+    void global_cmvn(float *din);
+  public:
+    FeatureExtract(int mode);
+    ~FeatureExtract();
+    int size();
+    int status();
+    void reset();
+    void insert(float *din, int len, int flag);
+    bool fetch(Tensor<float> *&dout);
+};
+#endif
--- a/cpp_onnx/src/lib/FeatureQueue.cpp
+++ b/cpp_onnx/src/lib/FeatureQueue.cpp
+#include "FeatureQueue.h"
+#include "CommonStruct.h"
+#include <string>
+#include <ComDefine.h>
+FeatureQueue::FeatureQueue()
+{
+    buff = new Tensor<float>(67, 80);
+    window_size = 67;
+    buff_idx = 0;
+}
+FeatureQueue::~FeatureQueue()
+{
+    delete buff;
+}
+void FeatureQueue::reinit(int size)
+{
+    delete buff;
+    buff = new Tensor<float>(size, 80);
+    buff_idx = 0;
+    window_size = size;
+}
+void FeatureQueue::reset()
+{
+    buff_idx = 0;
+}
+void FeatureQueue::push(float *din, int flag)
+{
+    int offset = buff_idx * 80;
+    memcpy(buff->buff + offset, din, 80 * sizeof(float));
+    buff_idx++;
+    if (flag == S_END) {
+        Tensor<float> *tmp = new Tensor<float>(buff_idx, 80);
+        memcpy(tmp->buff, buff->buff, buff_idx * 80 * sizeof(float));
+        feature_queue.push(tmp);
+        buff_idx = 0;
+    } else if (buff_idx == window_size) {
+        feature_queue.push(buff);
+        Tensor<float> *tmp = new Tensor<float>(window_size, 80);
+        memcpy(tmp->buff, buff->buff + (window_size - 3) * 80,
+               3 * 80 * sizeof(float));
+        buff_idx = 3;
+        buff = tmp;
+    }
+}
+Tensor<float> *FeatureQueue::pop()
+{
+    Tensor<float> *tmp = feature_queue.front();
+    feature_queue.pop();
+    return tmp;
+}
+int FeatureQueue::size()
+{
+    return feature_queue.size();
+}
--- a/cpp_onnx/src/lib/FeatureQueue.h
+++ b/cpp_onnx/src/lib/FeatureQueue.h
+#ifndef FEATUREQUEUE_H
+#define FEATUREQUEUE_H
+#include "Tensor.h"
+#include <queue>
+#include <stdint.h>
+using namespace std;
+class FeatureQueue {
+  private:
+    queue<Tensor<float> *> feature_queue;
+    Tensor<float> *buff;
+    int buff_idx;
+    int window_size;
+  public:
+    FeatureQueue();
+    ~FeatureQueue();
+    void reinit(int size);
+    void reset();
+    void push(float *din, int flag);
+    Tensor<float> *pop();
+    int size();
+};
+#endif
--- a/cpp_onnx/src/lib/Model.cpp
+++ b/cpp_onnx/src/lib/Model.cpp
+#include "precomp.h"
+Model *create_model(const char *path, int mode)
+{
+    Model *mm;
+    mm = new paraformer::ModelImp(path, mode);
+    return mm;
+}
--- a/cpp_onnx/src/lib/SpeechWrap.cpp
+++ b/cpp_onnx/src/lib/SpeechWrap.cpp
+#include "precomp.h"
+SpeechWrap::SpeechWrap()
+{
+    cache_size = 0;
+}
+SpeechWrap::~SpeechWrap()
+{
+}
+void SpeechWrap::reset()
+{
+    cache_size = 0;
+}
+void SpeechWrap::load(float *din, int len)
+{
+    in = din;
+    in_size = len;
+    total_size = cache_size + in_size;
+}
+int SpeechWrap::size()
+{
+    return total_size;
+}
+void SpeechWrap::update(int offset)
+{
+    int in_offset = offset - cache_size;
+    cache_size = (total_size - offset);
+    memcpy(cache, in + in_offset, cache_size * sizeof(float));
+}
+float &SpeechWrap::operator[](int i)
+{
+    return i < cache_size ? cache[i] : in[i - cache_size];
+}
--- a/cpp_onnx/src/lib/SpeechWrap.h
+++ b/cpp_onnx/src/lib/SpeechWrap.h
+#ifndef SPEECHWRAP_H
+#define SPEECHWRAP_H
+#include <stdint.h>
+class SpeechWrap {
+  private:
+    float cache[400];
+    int cache_size;
+    float *in;
+    int in_size;
+    int total_size;
+    int next_cache_size;
+  public:
+    SpeechWrap();
+    ~SpeechWrap();
+    void load(float *din, int len);
+    void update(int offset);
+    void reset();
+    int size();
+    float &operator[](int i);
+};
+#endif
--- a/cpp_onnx/src/lib/Tensor.h
+++ b/cpp_onnx/src/lib/Tensor.h
+#ifndef TENSOR_H
+#define TENSOR_H
+#include "alignedmem.h"
+using namespace std;
+template <typename T> class Tensor {
+  private:
+    void alloc_buff();
+    void free_buff();
+    int mem_size;
+  public:
+    T *buff;
+    int size[4];
+    int buff_size;
+    Tensor(Tensor<T> *in);
+    Tensor(int a);
+    Tensor(int a, int b);
+    Tensor(int a, int b, int c);
+    Tensor(int a, int b, int c, int d);
+    ~Tensor();
+    void zeros();
+    void shape();
+    void disp();
+    void dump(const char *mode);
+    void concat(Tensor<T> *din, int dim);
+    void resize(int a, int b, int c, int d);
+    void add(float coe, Tensor<T> *in);
+    void add(Tensor<T> *in);
+    void add(Tensor<T> *in1, Tensor<T> *in2);
+    void reload(Tensor<T> *in);
+};
+template <typename T> Tensor<T>::Tensor(int a) : size{1, 1, 1, a}
+{
+    alloc_buff();
+}
+template <typename T> Tensor<T>::Tensor(int a, int b) : size{1, 1, a, b}
+{
+    alloc_buff();
+}
+template <typename T> Tensor<T>::Tensor(int a, int b, int c) : size{1, a, b, c}
+{
+    alloc_buff();
+}
+template <typename T>
+Tensor<T>::Tensor(int a, int b, int c, int d) : size{a, b, c, d}
+{
+    alloc_buff();
+}
+template <typename T> Tensor<T>::Tensor(Tensor<T> *in)
+{
+    memcpy(size, in->size, 4 * sizeof(int));
+    alloc_buff();
+    memcpy(buff, in->buff, in->buff_size * sizeof(T));
+}
+template <typename T> Tensor<T>::~Tensor()
+{
+    free_buff();
+}
+template <typename T> void Tensor<T>::alloc_buff()
+{
+    buff_size = size[0] * size[1] * size[2] * size[3];
+    mem_size = buff_size;
+    buff = (T *)aligned_malloc(32, buff_size * sizeof(T));
+}
+template <typename T> void Tensor<T>::free_buff()
+{
+    aligned_free(buff);
+}
+template <typename T> void Tensor<T>::zeros()
+{
+    memset(buff, 0, buff_size * sizeof(T));
+}
+template <typename T> void Tensor<T>::shape()
+{
+    printf("(%d,%d,%d,%d)\n", size[0], size[1], size[2], size[3]);
+}
+// TODO:: fix it!!!!
+template <typename T> void Tensor<T>::concat(Tensor<T> *din, int dim)
+{
+    memcpy(buff + buff_size, din->buff, din->buff_size * sizeof(T));
+    buff_size += din->buff_size;
+    size[dim] += din->size[dim];
+}
+// TODO:: fix it!!!!
+template <typename T> void Tensor<T>::resize(int a, int b, int c, int d)
+{
+    size[0] = a;
+    size[1] = b;
+    size[2] = c;
+    size[3] = d;
+    buff_size = size[0] * size[1] * size[2] * size[3];
+}
+template <typename T> void Tensor<T>::add(float coe, Tensor<T> *in)
+{
+    int i;
+    for (i = 0; i < buff_size; i++) {
+        buff[i] = buff[i] + coe * in->buff[i];
+    }
+}
+template <typename T> void Tensor<T>::add(Tensor<T> *in)
+{
+    int i;
+    for (i = 0; i < buff_size; i++) {
+        buff[i] = buff[i] + in->buff[i];
+    }
+}
+template <typename T> void Tensor<T>::add(Tensor<T> *in1, Tensor<T> *in2)
+{
+    int i;
+    for (i = 0; i < buff_size; i++) {
+        buff[i] = buff[i] + in1->buff[i] + in2->buff[i];
+    }
+}
+template <typename T> void Tensor<T>::reload(Tensor<T> *in)
+{
+    memcpy(buff, in->buff, in->buff_size * sizeof(T));
+}
+template <typename T> void Tensor<T>::disp()
+{
+    int i;
+    for (i = 0; i < buff_size; i++) {
+        cout << buff[i] << " ";
+    }
+    cout << endl;
+}
+template <typename T> void Tensor<T>::dump(const char *mode)
+{
+    FILE *fp;
+    fp = fopen("tmp.bin", mode);
+    fwrite(buff, 1, buff_size * sizeof(T), fp);
+    fclose(fp);
+}
+#endif
--- a/cpp_onnx/src/lib/Vocab.cpp
+++ b/cpp_onnx/src/lib/Vocab.cpp
+#include "Vocab.h"
+#include <fstream>
+#include <iostream>
+#include <list>
+#include <sstream>
+#include <string>
+using namespace std;
+Vocab::Vocab(const char *filename)
+{
+    ifstream in(filename);
+    string line;
+    if (in) // 有该文件
+    {
+        while (getline(in, line)) // line中不包括每行的换行符
+        {
+            vocab.push_back(line);
+        }
+        // cout << vocab[1719] << endl;
+    }
+    // else // 没有该文件
+    //{
+    //     cout << "no such file" << endl;
+    // }
+}
+Vocab::~Vocab()
+{
+}
+string Vocab::vector2string(vector<int> in)
+{
+    int i;
+    stringstream ss;
+    for (auto it = in.begin(); it != in.end(); it++) {
+        ss << vocab[*it];
+    }
+    return ss.str();
+}
+int str2int(string str)
+{
+    const char *ch_array = str.c_str();
+    if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
+        ((ch_array[2] & 0xc0) != 0x80))
+        return 0;
+    int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
+              (ch_array[2] & 0x3f);
+    return val;
+}
+bool Vocab::isChinese(string ch)
+{
+    if (ch.size() != 3) {
+        return false;
+    }
+    int unicode = str2int(ch);
+    if (unicode >= 19968 && unicode <= 40959) {
+        return true;
+    }
+    return false;
+}
+string Vocab::vector2stringV2(vector<int> in)
+{
+    int i;
+    list<string> words;
+    int is_pre_english = false;
+    int pre_english_len = 0;
+    int is_combining = false;
+    string combine = "";
+    for (auto it = in.begin(); it != in.end(); it++) {
+        string word = vocab[*it];
+        // step1 space character skips
+        if (word == "<s>" || word == "</s>" || word == "<unk>")
+            continue;
+        // step2 combie phoneme to full word
+        {
+            int sub_word = !(word.find("@@") == string::npos);
+            // process word start and middle part
+            if (sub_word) {
+                combine += word.erase(word.length() - 2);
+                is_combining = true;
+                continue;
+            }
+            // process word end part
+            else if (is_combining) {
+                combine += word;
+                is_combining = false;
+                word = combine;
+                combine = "";
+            }
+        }
+        // step3 process english word deal with space , turn abbreviation to upper case
+        {
+            // input word is chinese, not need process 
+            if (isChinese(word)) {
+                words.push_back(word);
+                is_pre_english = false;
+            }
+            // input word is english word
+            else {
+                // pre word is chinese
+                if (!is_pre_english) {
+                    word[0] = word[0] - 32;
+                    words.push_back(word);
+                    pre_english_len = word.size();
+                }
+                // pre word is english word
+                else {
+                    // single letter turn to upper case
+                    if (word.size() == 1) {
+                        word[0] = word[0] - 32;
+                    }
+                    if (pre_english_len > 1) {
+                        words.push_back(" ");
+                        words.push_back(word);
+                        pre_english_len = word.size();
+                    } 
+                    else {
+                        if (word.size() > 1) {
+                            words.push_back(" ");
+                        }
+                        words.push_back(word);
+                        pre_english_len = word.size();
+                    }
+                }
+                is_pre_english = true;
+            }
+        }
+    }
+    // for (auto it = words.begin(); it != words.end(); it++) {
+    //     cout << *it << endl;
+    // }
+    stringstream ss;
+    for (auto it = words.begin(); it != words.end(); it++) {
+        ss << *it;
+    }
+    return ss.str();
+}
+int Vocab::size()
+{
+    return vocab.size();
+}
--- a/cpp_onnx/src/lib/Vocab.h
+++ b/cpp_onnx/src/lib/Vocab.h
+#ifndef VOCAB_H
+#define VOCAB_H
+#include <stdint.h>
+#include <string>
+#include <vector>
+using namespace std;
+class Vocab {
+  private:
+    vector<string> vocab;
+    bool isChinese(string ch);
+    bool isEnglish(string ch);
+  public:
+    Vocab(const char *filename);
+    ~Vocab();
+    int size();
+    string vector2string(vector<int> in);
+    string vector2stringV2(vector<int> in);
+};
+#endif
--- a/cpp_onnx/src/lib/alignedmem.cpp
+++ b/cpp_onnx/src/lib/alignedmem.cpp
+#include "precomp.h"
+void *aligned_malloc(size_t alignment, size_t required_bytes)
+{
+    void *p1;  // original block
+    void **p2; // aligned block
+    int offset = alignment - 1 + sizeof(void *);
+    if ((p1 = (void *)malloc(required_bytes + offset)) == NULL) {
+        return NULL;
+    }
+    p2 = (void **)(((size_t)(p1) + offset) & ~(alignment - 1));
+    p2[-1] = p1;
+    return p2;
+}
+void aligned_free(void *p)
+{
+    free(((void **)p)[-1]);
+}
--- a/cpp_onnx/src/lib/alignedmem.h
+++ b/cpp_onnx/src/lib/alignedmem.h
+#ifndef ALIGNEDMEM_H
+#define ALIGNEDMEM_H
+extern void *aligned_malloc(size_t alignment, size_t required_bytes);
+extern void aligned_free(void *p);
+#endif
--- a/cpp_onnx/src/lib/commonfunc.h
+++ b/cpp_onnx/src/lib/commonfunc.h
+#pragma once 
+#ifdef _WIN32
+#include <codecvt>
+inline std::wstring string2wstring(const std::string& str, const std::string& locale)
+{
+    typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
+    std::wstring_convert<F> strCnv(new F(locale));
+    return strCnv.from_bytes(str);
+}
+inline std::wstring  strToWstr(std::string str) {
+    if (str.length() == 0)
+        return L"";
+    return  string2wstring(str, "zh-CN");
+}
+#endif
+inline void getInputName(Ort::Session* session, string& inputName,int nIndex=0) {
+    size_t numInputNodes = session->GetInputCount();
+    if (numInputNodes > 0) {
+        Ort::AllocatorWithDefaultOptions allocator;
+        {
+            auto t = session->GetInputNameAllocated(nIndex, allocator);
+            inputName = t.get();
+        }
+    }
+}
+inline void getOutputName(Ort::Session* session, string& outputName, int nIndex = 0) {
+    size_t numOutputNodes = session->GetOutputCount();
+    if (numOutputNodes > 0) {
+        Ort::AllocatorWithDefaultOptions allocator;
+        {
+            auto t = session->GetOutputNameAllocated(nIndex, allocator);
+            outputName = t.get();
+        }
+    }
+}
\ No newline at end of file
--- a/cpp_onnx/src/lib/paraformer_onnx.cpp
+++ b/cpp_onnx/src/lib/paraformer_onnx.cpp
+#include "precomp.h"
+using namespace std;
+using namespace paraformer;
+ModelImp::ModelImp(const char* path, int mode,int nNumThread)
+{
+    string model_path = pathAppend(path, "model.onnx");
+    string vocab_path = pathAppend(path, "vocab.txt");
+    fe = new FeatureExtract(mode);
+    //p_helper = new ModelParamsHelper(wenet_path.c_str(), 500);
+    //encoder = new Encoder(&p_helper->params.encoder);
+    //predictor = new Predictor(&p_helper->params.predictor);
+    //decoder = new Decoder(&p_helper->params.decoder);
+    sessionOptions.SetInterOpNumThreads(nNumThread);
+    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
+#ifdef _WIN32
+    wstring wstrPath = strToWstr(model_path);
+    m_session = new Ort::Session(env, wstrPath.c_str(), sessionOptions);
+#else
+    m_session = new Ort::Session(env, model_path.c_str(), sessionOptions);
+#endif
+    string strName;
+    getInputName(m_session, strName);
+    m_strInputNames.push_back(strName.c_str());
+    getInputName(m_session, strName,1);
+    m_strInputNames.push_back(strName);
+    getOutputName(m_session, strName);
+    m_strOutputNames.push_back(strName);
+    getOutputName(m_session, strName,1);
+    m_strOutputNames.push_back(strName);
+    for (auto& item : m_strInputNames)
+        m_szInputNames.push_back(item.c_str());
+    for (auto& item : m_strOutputNames)
+        m_szOutputNames.push_back(item.c_str());
+    vocab = new Vocab(vocab_path.c_str());
+}
+ModelImp::~ModelImp()
+{
+    delete fe;
+    //delete p_helper;
+    //delete encoder;
+    //delete predictor;
+    // 
+    //delete decoder;
+    if (m_session)
+    {
+        delete m_session;
+        m_session = nullptr;
+    }
+    delete vocab;
+}
+void ModelImp::reset()
+{
+    fe->reset();
+}
+void ModelImp::apply_lfr(Tensor<float>*& din)
+{
+    int mm = din->size[2];
+    int ll = ceil(mm / 6.0);
+    Tensor<float>* tmp = new Tensor<float>(ll, 560);
+    int i, j;
+    int out_offset = 0;
+    for (i = 0; i < ll; i++) {
+        for (j = 0; j < 7; j++) {
+            int idx = i * 6 + j - 3;
+            if (idx < 0) {
+                idx = 0;
+            }
+            if (idx >= mm) {
+                idx = mm - 1;
+            }
+            memcpy(tmp->buff + out_offset, din->buff + idx * 80,
+                sizeof(float) * 80);
+            out_offset += 80;
+        }
+    }
+    delete din;
+    din = tmp;
+}
+void ModelImp::apply_cmvn(Tensor<float>* din)
+{
+    const float* var;
+    const float* mean;
+    float scale = 22.6274169979695;
+    int m = din->size[2];
+    int n = din->size[3];
+    var = (const float*)paraformer_cmvn_var_hex;
+    mean = (const float*)paraformer_cmvn_mean_hex;
+    int i, j;
+    for (i = 0; i < m; i++) {
+        for (j = 0; j < n; j++) {
+            int idx = i * n + j;
+            din->buff[idx] = (din->buff[idx] + mean[j]) * var[j];
+        }
+    }
+}
+string ModelImp::greedy_search(float * in, int nLen )
+{
+    vector<int> hyps;
+    int Tmax = nLen;
+    int i;
+    for (i = 0; i < Tmax; i++) {
+        int max_idx;
+        float max_val;
+        findmax(in + i * 8404, 8404, max_val, max_idx);
+        hyps.push_back(max_idx);
+    }
+    return vocab->vector2stringV2(hyps);
+}
+string ModelImp::forward(float* din, int len, int flag)
+{
+    Tensor<float>* in;
+    fe->insert(din, len, flag);
+    fe->fetch(in);
+    apply_lfr(in);
+    apply_cmvn(in);
+    //encoder->forward(in);
+    //Tensor<float> enc_out(in);
+    //predictor->forward(in);
+    //decoder->forward(in, &enc_out);
+    //int64_t speech_len = in->size[2];
+    //Ort::Value inputTensor = Ort::Value::CreateTensor<float>(m_memoryInfo, in->buff, in->buff_size, input_shape_.data(), input_shape_.size());
+    Ort::RunOptions run_option;
+    std::array<int64_t, 3> input_shape_{ in->size[0],in->size[2],in->size[3] };
+    Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo,
+        in->buff,
+        in->buff_size,
+        input_shape_.data(),
+        input_shape_.size());
+    std::vector<int32_t> feats_len{ in->size[2] };
+    std::vector<int64_t> feats_len_dim{ 1 };
+    Ort::Value onnx_feats_len = Ort::Value::CreateTensor(
+        m_memoryInfo,
+        feats_len.data(),
+        feats_len.size() * sizeof(int32_t),
+        feats_len_dim.data(),
+        feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32);
+    std::vector<Ort::Value> input_onnx;
+    input_onnx.emplace_back(std::move(onnx_feats));
+    input_onnx.emplace_back(std::move(onnx_feats_len));
+    //auto output = m_session_encoder->Run(run_option,
+    //    m_strEncInputName.data(),
+    //    input_onnx.data(),
+    //    m_strEncInputName.size(),
+    //    m_strEncOutputName.data(),
+    //    m_strEncOutputName.size()
+    //);
+    auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(),  m_szOutputNames.data(), m_szOutputNames.size());
+    //assert(outputTensor.size() == 1 && outputTensor[0].IsTensor());
+    std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
+    int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
+    float* floatData = outputTensor[0].GetTensorMutableData<float>();
+    auto encoder_out_lens = outputTensor[1].GetTensorMutableData<int64_t>();
+    //float* floatSize = outputTensor[1].GetTensorMutableData<float>();
+    //std::vector<float> out_data(floatArray, floatArray + outputCount);
+    string result = greedy_search(floatData, *encoder_out_lens);
+    if(in)
+        delete in;
+    return result;
+}
+string ModelImp::forward_chunk(float* din, int len, int flag)
+{
+    printf("Not Imp!!!!!!\n");
+    return "Hello";
+}
+string ModelImp::rescoring()
+{
+    printf("Not Imp!!!!!!\n");
+    return "Hello";
+}
--- a/cpp_onnx/src/lib/paraformer_onnx.h
+++ b/cpp_onnx/src/lib/paraformer_onnx.h
+#pragma once
+#ifndef PARAFORMER_MODELIMP_H
+#define PARAFORMER_MODELIMP_H
+namespace paraformer {
+    class ModelImp : public Model {
+    private:
+        FeatureExtract* fe;
+        Vocab* vocab;
+        void apply_lfr(Tensor<float>*& din);
+        void apply_cmvn(Tensor<float>* din);
+        string greedy_search( float* in, int nLen);
+#ifdef _WIN_X86
+        Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
+#else
+        Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+#endif
+        Ort::Session* m_session = nullptr;
+        Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR, "paraformer");
+        Ort::SessionOptions sessionOptions = Ort::SessionOptions();
+        vector<string> m_strInputNames, m_strOutputNames;
+        vector<const char*> m_szInputNames;
+        vector<const char*> m_szOutputNames;
+        //string m_strInputName, m_strInputNameLen;
+        //string m_strOutputName, m_strOutputNameLen;
+    public:
+        ModelImp(const char* path, int mode, int nNumThread=4);
+        ~ModelImp();
+        void reset();
+        string forward_chunk(float* din, int len, int flag);
+        string forward(float* din, int len, int flag);
+        string rescoring();
+    };
+} // namespace paraformer
+#endif
--- a/cpp_onnx/src/lib/precomp.h
+++ b/cpp_onnx/src/lib/precomp.h
+#pragma once 
+// system 
+#include "alignedmem.h"
+#include <iostream>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <deque>
+#include <iostream>
+#include <list>
+#include <locale.h>
+#include <vector>
+#include <string>
+#include <math.h>
+#include <numeric>
+using namespace std;
+// third part
+#include <fftw3.h>
+#include "onnxruntime_run_options_config_keys.h"
+#include "onnxruntime_cxx_api.h"
+// mine
+#include "commonfunc.h"
+#include <ComDefine.h>
+#include "predefine_coe.h"
+#include "Vocab.h"
+#include "util.h"
+#include "CommonStruct.h"
+#include "FeatureExtract.h"
+#include "SpeechWrap.h"
+#include "Model.h"
+#include "paraformer_onnx.h"
+using namespace paraformer;
\ No newline at end of file
--- a/cpp_onnx/src/lib/predefine_coe.h
+++ b/cpp_onnx/src/lib/predefine_coe.h