Commit c357b7e2 authored by mayong's avatar mayong
Browse files

update files.

parent 1b3197db
......@@ -19,8 +19,26 @@
"cmakeCommandArgs": "",
"buildCommandArgs": "",
"ctestCommandArgs": "",
"inheritEnvironments": [ "msvc_x64_x64" ],
"variables": []
"inheritEnvironments": [ "msvc_x64_x64" ]
},
{
"name": "Linux-GCC-Debug",
"generator": "Unix Makefiles",
"configurationType": "Debug",
"cmakeExecutable": "cmake",
"remoteCopySourcesExclusionList": [ ".vs", ".git", "out" ],
"cmakeCommandArgs": "-DONNXRUNTIME_DIR=/data/linux/thirdpart/onnxruntime-linux-x64-1.14.1",
"buildCommandArgs": "",
"ctestCommandArgs": "",
"inheritEnvironments": [ "linux_x64" ],
"remoteMachineName": "${defaultRemoteMachineName}",
"remoteCMakeListsRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/src",
"remoteBuildRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/build/${name}",
"remoteInstallRoot": "$HOME/.vs/${projectDirName}/${workspaceHash}/out/install/${name}",
"remoteCopySources": true,
"rsyncCommandArgs": "-t --delete",
"remoteCopyBuildOutput": false,
"remoteCopySourcesMethod": "rsync"
}
]
}
\ No newline at end of file
......@@ -43,11 +43,16 @@ class Audio {
Audio(int data_type, int size);
~Audio();
void disp();
bool loadwav(const char *filename);
bool loadwav(const char* filename);
bool loadwav(const char* buf, int nLen);
bool loadpcmwav(const char* buf, int nFileLen);
bool loadpcmwav(const char* filename);
int fetch_chunck(float *&dout, int len);
int fetch(float *&dout, int &len, int &flag);
void padding();
void split();
int get_queue_size() { return (int)frame_queue.size(); }
};
#endif
......@@ -25,8 +25,7 @@ class AudioWindow {
out_idx = 1;
sum = 0;
};
~AudioWindow()
{
~AudioWindow(){
free(window);
};
int put(int val)
......@@ -102,6 +101,11 @@ Audio::~Audio()
{
if (speech_buff != NULL) {
free(speech_buff);
}
if (speech_data != NULL) {
free(speech_data);
}
}
......@@ -115,9 +119,11 @@ void Audio::disp()
bool Audio::loadwav(const char *filename)
{
if (speech_data != NULL) {
free(speech_data);
}
if (speech_buff != NULL) {
free(speech_buff);
free(speech_data);
}
offset = 0;
......@@ -133,28 +139,191 @@ bool Audio::loadwav(const char *filename)
speech_len = (nFileLen - 44) / 2;
speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
fclose(fp);
speech_data = (float *)malloc(sizeof(float) * speech_align_len);
memset(speech_data, 0, sizeof(float) * speech_align_len);
int i;
float scale = 1;
if (speech_buff)
{
memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
fclose(fp);
speech_data = (float*)malloc(sizeof(float) * speech_align_len);
memset(speech_data, 0, sizeof(float) * speech_align_len);
int i;
float scale = 1;
if (data_type == 1) {
scale = 32768;
}
if (data_type == 1) {
scale = 32768;
for (i = 0; i < speech_len; i++) {
speech_data[i] = (float)speech_buff[i] / scale;
}
AudioFrame* frame = new AudioFrame(speech_len);
frame_queue.push(frame);
return true;
}
else
return false;
}
bool Audio::loadwav(const char* buf, int nFileLen)
{
for (i = 0; i < speech_len; i++) {
speech_data[i] = (float)speech_buff[i] / scale;
if (speech_data != NULL) {
free(speech_data);
}
if (speech_buff != NULL) {
free(speech_buff);
}
AudioFrame *frame = new AudioFrame(speech_len);
frame_queue.push(frame);
return true;
offset = 0;
size_t nOffset = 0;
#define WAV_HEADER_SIZE 44
speech_len = (nFileLen - WAV_HEADER_SIZE) / 2;
speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
if (speech_buff)
{
memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));
speech_data = (float*)malloc(sizeof(float) * speech_align_len);
memset(speech_data, 0, sizeof(float) * speech_align_len);
int i;
float scale = 1;
if (data_type == 1) {
scale = 32768;
}
for (i = 0; i < speech_len; i++) {
speech_data[i] = (float)speech_buff[i] / scale;
}
return true;
}
else
return false;
}
bool Audio::loadpcmwav(const char* buf, int nBufLen)
{
if (speech_data != NULL) {
free(speech_data);
}
if (speech_buff != NULL) {
free(speech_buff);
}
offset = 0;
size_t nOffset = 0;
#define WAV_HEADER_SIZE 44
speech_len = nBufLen / 2;
speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
if (speech_buff)
{
memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
speech_data = (float*)malloc(sizeof(float) * speech_align_len);
memset(speech_data, 0, sizeof(float) * speech_align_len);
int i;
float scale = 1;
if (data_type == 1) {
scale = 32768;
}
for (i = 0; i < speech_len; i++) {
speech_data[i] = (float)speech_buff[i] / scale;
}
return true;
}
else
return false;
}
bool Audio::loadpcmwav(const char* filename)
{
if (speech_data != NULL) {
free(speech_data);
}
if (speech_buff != NULL) {
free(speech_buff);
}
offset = 0;
FILE* fp;
fp = fopen(filename, "rb");
if (fp == nullptr)
return false;
fseek(fp, 0, SEEK_END);
uint32_t nFileLen = ftell(fp);
fseek(fp, 0, SEEK_SET);
speech_len = (nFileLen) / 2;
speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
if (speech_buff)
{
memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
fclose(fp);
speech_data = (float*)malloc(sizeof(float) * speech_align_len);
memset(speech_data, 0, sizeof(float) * speech_align_len);
int i;
float scale = 1;
if (data_type == 1) {
scale = 32768;
}
for (i = 0; i < speech_len; i++) {
speech_data[i] = (float)speech_buff[i] / scale;
}
AudioFrame* frame = new AudioFrame(speech_len);
frame_queue.push(frame);
return true;
}
else
return false;
}
int Audio::fetch_chunck(float *&dout, int len)
{
if (offset >= speech_align_len) {
......@@ -163,7 +332,7 @@ int Audio::fetch_chunck(float *&dout, int len)
} else if (offset == speech_align_len - len) {
dout = speech_data + offset;
offset = speech_align_len;
// 临时解决
// 临时解决
AudioFrame *frame = frame_queue.front();
frame_queue.pop();
delete frame;
......
......@@ -18,7 +18,7 @@ if(WIN32)
endif()
target_include_directories(rapidasr PUBLIC ${CMAKE_SOURCE_DIR}/win/include )
target_compile_definitions(rapidasr PUBLIC -D_RPASR_API_EXPORT)
else()
set(EXTRA_LIBS fftw3f webrtcvad pthread)
......
#pragma once
typedef struct
{
std::string msg;
}RPASR_RECOG_RESULT;
#ifdef _WIN32
#include <codecvt>
inline std::wstring string2wstring(const std::string& str, const std::string& locale)
{
typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> F;
......@@ -42,12 +53,4 @@ inline void getOutputName(Ort::Session* session, string& outputName, int nIndex
}
}
}
inline bool FileIsExist(const string & name)
{
struct stat buffer;
return (stat(name.c_str(), &buffer) == 0);
}
\ No newline at end of file
......@@ -15,9 +15,8 @@
#include <math.h>
#include <numeric>
#include <cstring>
#include <sys/types.h>
#include <sys/stat.h>
using namespace std;
// third part
......@@ -42,9 +41,10 @@ using namespace std;
#include "FeatureExtract.h"
#include "FeatureQueue.h"
#include "SpeechWrap.h"
#include <Audio.h>
#include "Model.h"
#include "paraformer_onnx.h"
#include "librapidasrapi.h"
using namespace paraformer;
......@@ -15,6 +15,6 @@ include_directories(${CMAKE_SOURCE_DIR}/include)
set(EXECNAME "tester")
add_executable(${EXECNAME} "tester.cpp")
target_link_libraries(${EXECNAME} PUBLIC onnxruntime ${EXTRA_LIBS})
target_link_libraries(${EXECNAME} PUBLIC ${EXTRA_LIBS})
#include <iostream>
#ifndef _WIN32
#include <sys/time.h>
#else
#include <win_func.h>
#endif
#include <Audio.h>
#include <Model.h>
#include "librapidasrapi.h"
#include <iostream>
using namespace std;
......@@ -21,52 +22,49 @@ int main(int argc, char *argv[])
struct timeval start, end;
gettimeofday(&start, NULL);
int nThreadNum = 4;
Model* mm = create_model(argv[1], nThreadNum);
if (!mm)
RPASR_HANDLE AsrHanlde=RapidAsrInit(argv[1], nThreadNum);
if (!AsrHanlde)
{
printf("Cannot load ASR Model from: %s, there must be files model.onnx and vocab.txt", argv[1]);
exit(-1);
}
Audio audio(0);
if (!audio.loadwav(argv[2]))
{
printf("cannot load %s\n", argv[2]);
return -1;
}
audio.disp();
gettimeofday(&end, NULL);
long seconds = (end.tv_sec - start.tv_sec);
long micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
printf("Model initialization takes %lfs.\n", (double)micros / 1000000);
audio.split();
setbuf(stdout, NULL);
cout << "Result: \"";
gettimeofday(&start, NULL);
float *buff;
int len;
int flag;
while (audio.fetch(buff, len, flag) > 0) {
mm->reset();
string msg = mm->forward(buff, len, flag);
cout << msg;
}
RPASR_RESULT Result=RapidAsrRecogPCMFile(AsrHanlde, argv[2], RASR_NONE, NULL);
gettimeofday(&end, NULL);
cout << "\"." << endl;
if (Result)
{
string msg = RapidAsrGetResult(Result, 0);
cout << msg << endl;
cout << "\"." << endl;
RapidAsrFreeResult(Result);
}
else
{
cout <<("no return data!");
}
seconds = (end.tv_sec - start.tv_sec);
long taking_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
printf("Model inference takes %lfs.\n", (double)micros / 1000000);
printf("Model inference RTF: %04lf.\n", (double)taking_micros/micros );
delete mm;
RapidAsrUninit(AsrHanlde);
return 0;
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment