update files.

c357b7e2 · mayong · 1b3197db · c357b7e2 · c357b7e2 · c357b7e2
Commit c357b7e2 authored Mar 08, 2023 by mayong
8 changed files
--- a/cpp_onnx/CMakeSettings.json
+++ b/cpp_onnx/CMakeSettings.json
@@ -19,8 +19,26 @@
      "cmakeCommandArgs": "",
      "buildCommandArgs": "",
      "ctestCommandArgs": "",
-      "inheritEnvironments": [ "msvc_x64_x64" ],
-      "variables": []
+      "inheritEnvironments": [ "msvc_x64_x64" ]
+    },
+    {
+      "name": "Linux-GCC-Debug",
+      "generator": "Unix Makefiles",
+      "configurationType": "Debug",
+      "cmakeExecutable": "cmake",
+      "remoteCopySourcesExclusionList": [ ".vs", ".git", "out" ],
+      "cmakeCommandArgs": "-DONNXRUNTIME_DIR=/data/linux/thirdpart/onnxruntime-linux-x64-1.14.1",
+      "buildCommandArgs": "",
+      "ctestCommandArgs": "",
+      "inheritEnvironments": [ "linux_x64" ],
+      "remoteMachineName": "${defaultRemoteMachineName}",
+      "remoteCMakeListsRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/src",
+      "remoteBuildRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/build/${name}",
+      "remoteInstallRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/install/${name}",
+      "remoteCopySources": true,
+      "rsyncCommandArgs": "-t --delete",
+      "remoteCopyBuildOutput": false,
+      "remoteCopySourcesMethod": "rsync"
    }
  ]
 }
\ No newline at end of file
--- a/cpp_onnx/include/Audio.h
+++ b/cpp_onnx/include/Audio.h
@@ -43,11 +43,16 @@ class Audio {
    Audio(int data_type, int size);
    ~Audio();
    void disp();
-    bool loadwav(const char *filename);
+    bool loadwav(const char* filename);
+    bool loadwav(const char* buf, int nLen);
+    bool loadpcmwav(const char* buf, int nFileLen);
+    bool loadpcmwav(const char* filename);
    int fetch_chunck(float *&dout, int len);
    int fetch(float *&dout, int &len, int &flag);
    void padding();
    void split();
+
+    int get_queue_size() { return (int)frame_queue.size(); }
 };

 #endif
--- a/cpp_onnx/src/Audio.cpp
+++ b/cpp_onnx/src/Audio.cpp
@@ -25,8 +25,7 @@ class AudioWindow {
        out_idx = 1;
        sum = 0;
    };
-    ~AudioWindow()
-    {
+    ~AudioWindow(){
        free(window);
    };
    int put(int val)
@@ -102,6 +101,11 @@ Audio::~Audio()
 {
    if (speech_buff != NULL) {
        free(speech_buff);
+        
+    }
+
+    if (speech_data != NULL) {
+        
        free(speech_data);
    }
 }
@@ -115,9 +119,11 @@ void Audio::disp()
 bool Audio::loadwav(const char *filename)
 {

+    if (speech_data != NULL) {
+        free(speech_data);
+    }
    if (speech_buff != NULL) {
        free(speech_buff);
-        free(speech_data);
    }

    offset = 0;
@@ -133,28 +139,191 @@ bool Audio::loadwav(const char *filename)
    speech_len = (nFileLen - 44) / 2;
    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
-    memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
-    int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
-    fclose(fp);

-    speech_data = (float *)malloc(sizeof(float) * speech_align_len);
-    memset(speech_data, 0, sizeof(float) * speech_align_len);
-    int i;
-    float scale = 1;
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+        int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
+        fclose(fp);
+
+        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+        memset(speech_data, 0, sizeof(float) * speech_align_len);
+        int i;
+        float scale = 1;
+
+        if (data_type == 1) {
+            scale = 32768;
+        }

-    if (data_type == 1) {
-        scale = 32768;
+        for (i = 0; i < speech_len; i++) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+
+
+        return true;
    }
+    else
+        return false;
+}
+
+
+bool Audio::loadwav(const char* buf, int nFileLen)
+{

-    for (i = 0; i < speech_len; i++) {
-        speech_data[i] = (float)speech_buff[i] / scale;
+    
+
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
    }

-    AudioFrame *frame = new AudioFrame(speech_len);
-    frame_queue.push(frame);
-    return true;
+    offset = 0;
+
+    size_t nOffset = 0;
+
+#define WAV_HEADER_SIZE 44
+
+    speech_len = (nFileLen - WAV_HEADER_SIZE) / 2;
+    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+        memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));
+
+
+        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+        memset(speech_data, 0, sizeof(float) * speech_align_len);
+        int i;
+        float scale = 1;
+
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (i = 0; i < speech_len; i++) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+
+        return true;
+    }
+    else
+        return false;
+
 }

+
+bool Audio::loadpcmwav(const char* buf, int nBufLen)
+{
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+    offset = 0;
+
+    size_t nOffset = 0;
+
+#define WAV_HEADER_SIZE 44
+
+    speech_len = nBufLen / 2;
+    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+        memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
+
+
+        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+        memset(speech_data, 0, sizeof(float) * speech_align_len);
+
+     
+        int i;
+        float scale = 1;
+
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (i = 0; i < speech_len; i++) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+
+        return true;
+
+    }
+    else
+        return false;
+
+    
+}
+
+bool Audio::loadpcmwav(const char* filename)
+{
+
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+    offset = 0;
+
+    FILE* fp;
+    fp = fopen(filename, "rb");
+    if (fp == nullptr)
+        return false;
+    fseek(fp, 0, SEEK_END);
+    uint32_t nFileLen = ftell(fp);
+    fseek(fp, 0, SEEK_SET);
+
+    speech_len = (nFileLen) / 2;
+    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+        int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
+        fclose(fp);
+
+        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+        memset(speech_data, 0, sizeof(float) * speech_align_len);
+
+
+
+        int i;
+        float scale = 1;
+
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (i = 0; i < speech_len; i++) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+
+    
+        return true;
+    }
+    else
+        return false;
+
+}
+
+
 int Audio::fetch_chunck(float *&dout, int len)
 {
    if (offset >= speech_align_len) {
@@ -163,7 +332,7 @@ int Audio::fetch_chunck(float *&dout, int len)
    } else if (offset == speech_align_len - len) {
        dout = speech_data + offset;
        offset = speech_align_len;
-        // 临时解决
+        // 临时解决 
        AudioFrame *frame = frame_queue.front();
        frame_queue.pop();
        delete frame;

--- a/cpp_onnx/src/CMakeLists.txt
+++ b/cpp_onnx/src/CMakeLists.txt
@@ -18,7 +18,7 @@ if(WIN32)
        endif()
        target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
        
-
+        target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT)
 else()

    set(EXTRA_LIBS fftw3f webrtcvad pthread)

--- a/cpp_onnx/src/commonfunc.h
+++ b/cpp_onnx/src/commonfunc.h
 #pragma once 
+
+
+typedef struct
+{
+    std::string msg;
+
+}RPASR_RECOG_RESULT;
+
+
 #ifdef _WIN32
 #include <codecvt>

+
+
 inline std::wstring string2wstring(const std::string& str, const std::string& locale)
 {
    typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
@@ -42,12 +53,4 @@ inline void getOutputName(Ort::Session* session, string& outputName, int nIndex

        }
    }
-}
-
-
-
-inline bool FileIsExist(const string & name) 
-{
-    struct stat buffer;
-    return (stat(name.c_str(), &buffer) == 0);
 }
\ No newline at end of file
--- a/cpp_onnx/src/precomp.h
+++ b/cpp_onnx/src/precomp.h
@@ -15,9 +15,8 @@
 #include <math.h>
 #include <numeric>

+
 #include <cstring>
-#include <sys/types.h>
-#include <sys/stat.h>

 using namespace std;
 // third part
@@ -42,9 +41,10 @@ using namespace std;
 #include "FeatureExtract.h"
 #include "FeatureQueue.h"
 #include "SpeechWrap.h"
+#include <Audio.h>
 #include "Model.h"
 #include "paraformer_onnx.h"
-
+#include "librapidasrapi.h"


 using namespace paraformer;
--- a/cpp_onnx/tester/CMakeLists.txt
+++ b/cpp_onnx/tester/CMakeLists.txt
@@ -15,6 +15,6 @@ include_directories(${CMAKE_SOURCE_DIR}/include)
 set(EXECNAME "tester")

 add_executable(${EXECNAME} "tester.cpp")
-target_link_libraries(${EXECNAME} PUBLIC onnxruntime ${EXTRA_LIBS})
+target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})


--- a/cpp_onnx/tester/tester.cpp
+++ b/cpp_onnx/tester/tester.cpp
-#include <iostream>
+
 #ifndef _WIN32
 #include <sys/time.h>
 #else
 #include <win_func.h>
 #endif

-#include <Audio.h>
-#include <Model.h>
+#include "librapidasrapi.h"
+
+#include <iostream>

 using namespace std;

@@ -21,52 +22,49 @@ int main(int argc, char *argv[])
    struct timeval start, end;
    gettimeofday(&start, NULL);
    int nThreadNum = 4;
-    Model* mm = create_model(argv[1], nThreadNum);
-    if (!mm)
+    RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum);
+
+    if (!AsrHanlde)
    {
        printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
        exit(-1);
    }
    
-  
-    Audio audio(0);
-    if (!audio.loadwav(argv[2]))
-    {
-        printf("cannot load %s\n", argv[2]);
-        return -1;
-    }
-    audio.disp();
-  
+ 

    gettimeofday(&end, NULL);
    long seconds = (end.tv_sec - start.tv_sec);
    long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
    printf("Model initialization takes %lfs.\n", (double)micros / 1000000);
-    audio.split();

    setbuf(stdout, NULL);
    cout << "Result: \"";
    gettimeofday(&start, NULL);
-    float *buff;
-    int len;
-    int flag;
-    while (audio.fetch(buff, len, flag) > 0) {
-        mm->reset();
-        string msg = mm->forward(buff, len, flag);
-        cout << msg;
-    }

+    RPASR_RESULT Result=RapidAsrRecogPCMFile(AsrHanlde, argv[2], RASR_NONE, NULL);
    gettimeofday(&end, NULL);

-    cout << "\"." << endl;
-
+    if (Result)
+    {
+        string msg = RapidAsrGetResult(Result, 0);
+        cout << msg << endl;
+        cout << "\"." << endl;
+        RapidAsrFreeResult(Result);
+    }
+    else
+    {
+        cout <<("no return data!");
+    }
+  
    seconds = (end.tv_sec - start.tv_sec);
    long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
    printf("Model inference takes %lfs.\n", (double)micros / 1000000);

    printf("Model inference RTF: %04lf.\n", (double)taking_micros/micros );

-    delete mm;
+    RapidAsrUninit(AsrHanlde);

    return 0;
 }
+
+    
\ No newline at end of file