Commit 93697015 authored by zhouxiang's avatar zhouxiang
Browse files

增加多进程测试简易方法

parent f182f72d
......@@ -5,7 +5,7 @@
#include "fstream"
#include <chrono>
#include "chatglm.h"
#include <unistd.h>
//static factoryllm fllm;
//static int modeltype = 0;
//static char* modelpath = NULL;
......@@ -19,6 +19,7 @@ struct BenchmarkConfig {
int batch = -1; // batch数, -1时使用文件中的行数作为batch
std::string file; // 输入文件
std::string output; // 输出文件,如果不设定则输出到屏幕
int runloop = 0;
};
void Usage() {
......@@ -51,6 +52,8 @@ void ParseArgs(int argc, char **argv, BenchmarkConfig &config) {
config.file = sargv[++i];
} else if (sargv[i] == "-o" || sargv[i] == "--output") {
config.output = sargv[++i];
} else if (sargv[i] == "--loop") {
config.runloop = 1;
} else {
Usage();
exit(-1);
......@@ -97,31 +100,59 @@ int main(int argc, char **argv) {
inputs.resize(config.batch);
}
std::vector <std::string> outputs;
static int tokens = 0;
auto st = std::chrono::system_clock::now();
chatGlm.ResponseBatch(inputs, outputs, [](int index, std::vector <std::string> &contents) {
if (index != -1) {
for (int i = 0; i < contents.size(); i++) {
tokens += (contents[i].size() > 0);
if(config.runloop == 1){
while(true){
std::vector <std::string> outputs;
static int tokens = 0;
auto st = std::chrono::system_clock::now();
chatGlm.ResponseBatch(inputs, outputs, [](int index, std::vector <std::string> &contents) {
if (index != -1) {
for (int i = 0; i < contents.size(); i++) {
tokens += (contents[i].size() > 0);
}
}
});
float spend = GetSpan(st, std::chrono::system_clock::now());
if (config.output != "") {
FILE *fo = fopen(config.output.c_str(), "w");
for (int i = 0; i < outputs.size(); i++) {
fprintf(fo, "[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
}
fclose(fo);
}
pid_t pid = getpid();
// printf("batch: %d\n", (int)inputs.size());
printf("pid %d : output %d tokens\nuse %f s\nspeed = %f tokens / s\n", pid, tokens, spend, tokens / spend);
}
});
float spend = GetSpan(st, std::chrono::system_clock::now());
}
else{
std::vector <std::string> outputs;
static int tokens = 0;
auto st = std::chrono::system_clock::now();
chatGlm.ResponseBatch(inputs, outputs, [](int index, std::vector <std::string> &contents) {
if (index != -1) {
for (int i = 0; i < contents.size(); i++) {
tokens += (contents[i].size() > 0);
}
}
});
float spend = GetSpan(st, std::chrono::system_clock::now());
if (config.output != "") {
FILE *fo = fopen(config.output.c_str(), "w");
for (int i = 0; i < outputs.size(); i++) {
fprintf(fo, "[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
}
fclose(fo);
} else {
for (int i = 0; i < outputs.size(); i++) {
printf("[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
if (config.output != "") {
FILE *fo = fopen(config.output.c_str(), "w");
for (int i = 0; i < outputs.size(); i++) {
fprintf(fo, "[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
}
fclose(fo);
} else {
for (int i = 0; i < outputs.size(); i++) {
printf("[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
}
}
}
printf("batch: %d\n", (int)inputs.size());
printf("output %d tokens\nuse %f s\nspeed = %f tokens / s\n", tokens, spend, tokens / spend);
printf("batch: %d\n", (int)inputs.size());
printf("output %d tokens\nuse %f s\nspeed = %f tokens / s\n", tokens, spend, tokens / spend);
}
return 0;
}
\ No newline at end of file
#!/bin/bash
# 定义要运行的程序命令
program="./benchmark -p ../../models/chatglm-6b/chatglm_flm/chatglm_6b_test_int8.bin --loop"
# 定义要运行的实例数量
num_instances=2
# 启动后台进程
for ((i=1; i<=num_instances; i++)); do
$program &
done
# 压测持续时间(秒)
test_duration=120
# 等待一段时间以进行压测
sleep $test_duration
# 杀死所有后台进程
pkill -f "$program"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment