Commit 56215723 authored by zhouxiang's avatar zhouxiang
Browse files

1、同步到最新版本;2、增加batch推理接口;3、解决内存泄漏问题;4、修复llama系列流式输出不流畅的问题

parent 44be91d3
[submodule "third_party/pybind11"] [submodule "third_party/pybind11"]
path = third_party/pybind11 path = third_party/pybind11
url = https://github.com/pybind/pybind11.git url = https://github.com/pybind/pybind11.git
branch = v2.10.5
...@@ -5,12 +5,21 @@ project(fastllm LANGUAGES CXX) ...@@ -5,12 +5,21 @@ project(fastllm LANGUAGES CXX)
option(USE_CUDA "use cuda" ON) option(USE_CUDA "use cuda" ON)
option(PY_API "python api" OFF) option(PY_API "python api" OFF)
option(USE_MMAP "use mmap" OFF) option(USE_MMAP "use mmap" OFF)
option(USE_SENTENCEPIECE "use sentencepiece" OFF)
option(USE_IVCOREX "use iluvatar corex gpu" OFF)
message(STATUS "USE_CUDA: ${USE_CUDA}") message(STATUS "USE_CUDA: ${USE_CUDA}")
message(STATUS "PYTHON_API: ${PY_API}") message(STATUS "PYTHON_API: ${PY_API}")
message(STATUS "USE_SENTENCEPIECE: ${USE_SENTENCEPIECE}")
message(STATUS "USE_IVCOREX: ${USE_IVCOREX}")
set(CMAKE_BUILD_TYPE "Release") set(CMAKE_BUILD_TYPE "Release")
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
...@@ -25,7 +34,7 @@ endif() ...@@ -25,7 +34,7 @@ endif()
message(STATUS "CMAKE_CXX_FLAGS" ${CMAKE_CXX_FLAGS}) message(STATUS "CMAKE_CXX_FLAGS" ${CMAKE_CXX_FLAGS})
set(FASTLLM_CXX_SOURCES src/fastllm.cpp src/device.cpp src/model.cpp src/executor.cpp set(FASTLLM_CXX_SOURCES src/fastllm.cpp src/device.cpp src/model.cpp src/executor.cpp
src/devices/cpu/cpudevice.cpp src/devices/cpu/cpudevicebatch.cpp src/devices/cpu/cpudevice.cpp src/devices/cpu/cpudevicebatch.cpp
src/models/chatglm.cpp src/models/moss.cpp src/models/llama.cpp src/models/qwen.cpp src/models/basellm.cpp) src/models/chatglm.cpp src/models/moss.cpp src/models/llama.cpp src/models/qwen.cpp src/models/basellm.cpp src/models/glm.cpp)
include_directories(include) include_directories(include)
include_directories(include/utils) include_directories(include/utils)
...@@ -35,6 +44,12 @@ if (USE_MMAP) ...@@ -35,6 +44,12 @@ if (USE_MMAP)
add_compile_definitions(USE_MMAP) add_compile_definitions(USE_MMAP)
endif() endif()
if (USE_SENTENCEPIECE)
set(CMAKE_CXX_STANDARD 17)
add_compile_definitions(USE_SENTENCEPIECE)
set(FASTLLM_LINKED_LIBS ${FASTLLM_LINKED_LIBS} sentencepiece)
endif()
if (USE_CUDA) if (USE_CUDA)
enable_language(CUDA) enable_language(CUDA)
add_compile_definitions(USE_CUDA) add_compile_definitions(USE_CUDA)
...@@ -47,6 +62,11 @@ if (USE_CUDA) ...@@ -47,6 +62,11 @@ if (USE_CUDA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g --gpu-max-threads-per-block=1024") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g --gpu-max-threads-per-block=1024")
endif() endif()
if (USE_IVCOREX)
set(FASTLLM_LINKED_LIBS ${FASTLLM_LINKED_LIBS} cudart)
set(CMAKE_CUDA_ARCHITECTURES ${IVCOREX_ARCH})
endif()
if (PY_API) if (PY_API)
set(PYBIND third_party/pybind11) set(PYBIND third_party/pybind11)
add_subdirectory(${PYBIND}) add_subdirectory(${PYBIND})
...@@ -73,6 +93,9 @@ target_link_libraries(main fastllm) ...@@ -73,6 +93,9 @@ target_link_libraries(main fastllm)
add_executable(quant tools/src/quant.cpp) add_executable(quant tools/src/quant.cpp)
target_link_libraries(quant fastllm) target_link_libraries(quant fastllm)
add_executable(testOps test/ops/cppOps.cpp)
target_link_libraries(testOps fastllm)
add_executable(webui example/webui/webui.cpp) add_executable(webui example/webui/webui.cpp)
target_link_libraries(webui fastllm) target_link_libraries(webui fastllm)
add_custom_command( add_custom_command(
...@@ -84,6 +107,11 @@ add_custom_command( ...@@ -84,6 +107,11 @@ add_custom_command(
add_executable(benchmark example/benchmark/benchmark.cpp) add_executable(benchmark example/benchmark/benchmark.cpp)
target_link_libraries(benchmark fastllm) target_link_libraries(benchmark fastllm)
add_custom_command(
TARGET benchmark
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/example/benchmark/loop_benchmark.sh ${CMAKE_BINARY_DIR}/
)
add_executable(apiserver example/apiserver/apiserver.cpp example/apiserver/json11.cpp) add_executable(apiserver example/apiserver/apiserver.cpp example/apiserver/json11.cpp)
target_link_libraries(apiserver fastllm) target_link_libraries(apiserver fastllm)
......
...@@ -170,6 +170,18 @@ python3 tools/baichuan2flm.py baichuan-13b-int8.flm int8 #导出int8模型 ...@@ -170,6 +170,18 @@ python3 tools/baichuan2flm.py baichuan-13b-int8.flm int8 #导出int8模型
python3 tools/baichuan2flm.py baichuan-13b-int4.flm int4 #导出int4模型 python3 tools/baichuan2flm.py baichuan-13b-int4.flm int4 #导出int4模型
``` ```
### baichuan2模型导出 (默认脚本导出baichuan2-7b-chat模型)
``` sh
# 需要先安装baichuan2环境
# 如果使用自己finetune的模型需要修改baichuan2_2flm.py文件中创建tokenizer, model的代码
# 根据所需的精度,导出相应的模型
cd build
python3 tools/baichuan2_2flm.py baichuan2-7b-fp16.flm float16 #导出float16模型
python3 tools/baichuan2_2flm.py baichuan2-7b-int8.flm int8 #导出int8模型
python3 tools/baichuan2_2flm.py baichuan2-7b-int4.flm int4 #导出int4模型
```
### MOSS模型导出 ### MOSS模型导出
``` sh ``` sh
......
# 常见问题
## CMAKE
### CMAKE_CUDA_ARCHITECTURES must be non-empty if set.
**现象:**
> CMake Error at cmake/Modules/CMakeDetermineCUDACompiler.cmake:277 (message):
> CMAKE_CUDA_ARCHITECTURES must be non-empty if set.
> Call Stack (most recent call first):
> CMakeLists.txt:39 (enable_language)
**解决办法:**
部分版本cmake存在该问题,需手动指定`CMAKE_CUDA_ARCHITECTURES`。执行:
```shell
cmake .. -DUSE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native
```
### Unsupported gpu architecture 'compute_native'
**现象:**
> nvcc fatal : Unsupported gpu architecture 'compute_native'
**解决办法:**
手动修改 CMakeLists.txt,根据GPU型号手动指定GPU的[Compute Capability](https://developer.nvidia.com/cuda-gpus)。如:
``` diff
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,7 +52,7 @@
#message(${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
set(FASTLLM_CUDA_SOURCES src/devices/cuda/cudadevice.cpp src/devices/cuda/cudadevicebatch.cpp src/devices/cuda/fastllm-cuda.cu)
set(FASTLLM_LINKED_LIBS ${FASTLLM_LINKED_LIBS} cublas)
- set(CMAKE_CUDA_ARCHITECTURES "native")
+ set(CMAKE_CUDA_ARCHITECTURES 61 75 86 89)
endif()
if (PY_API)
```
## Windows
### fastllm.h error
**现象:**
> include\fastllm.h(50): error : identifier "top_k" is undefined
> include\fastllm.h(172): error : expected a "}"
> include\fastllm.h(234): error : identifier "DataDevice" is undefined
> ....
**解决办法:** 使用cmake构建通常不存在这一问题。参考 [example\README.md](/example/README.md)。签出代码后,**修改 include/fastllm.h**,Visual Studio中点击”文件“ -> "高级保存选项",在编码中选择”Unicode (UTF-8 **带签名**) -代码页 65001“,或在其他文本编辑器中转为”UTF-8 BOM“编码。(由于linux下gcc不识别BOM头,MSVC依赖BOM判断文件编码,该修改只能手动处理。)
### main.exe 无法识别中文输入
**原因:** Windows下cmd不支持UTF-8编码,
**解决办法:** 编译[Win32Demo](/example/README.md#win32demo-windows平台) 或使用 [WebUI](/example/README.md#web-ui)
### 导入提示 FileNotFoundError
**现象:**
> File "...Python\lib\ctypes\_\_init\_\_.py", line 374, in \_\_init\_\_
> self._handle = _dlopen(self._name, mode)
> FileNotFoundError: Could not find module 'tools\fastllm_pytools\fastllm_tools.dll' (or one of its dependencies). Try using the full path with constructor syntax.
**解决办法:** 非CPU编译时,部分版本的python存在这一问题。
GPU编译时,根据使用的CUDA版本,将cudart cublas的相关dll文件复制到fastllm_tools同一目录下,例如:
* CUDA 9.2
* %CUDA_PATH%\bin\cublas64_92.dll
* %CUDA_PATH%\bin\cudart64_92.dll
* CUDA 11.x
* %CUDA_PATH%\bin\cudart64_110.dll
* %CUDA_PATH%\bin\cublas64_11.dll
* %CUDA_PATH%\bin\cublasLt64_11.dll
* CUDA 12.x
* %CUDA_PATH%\bin\cudart64_12.dll
* %CUDA_PATH%\bin\cublas64_12.dll
* %CUDA_PATH%\bin\cublasLt64_12.dll
## fastllm_pytools
### 释放内存报错: CUDA error when release memory
**现象:**
退出时报错:
> Error: CUDA error when release memory!
> CUDA error = 4, cudaErrorCudartUnloading at fastllm/src/devices/cuda/fastllm-cuda.cu:1493
> 'driver shutting down'
**原因:** python解释器在终止时常常会优先终止自己的进程,而没有现先析构调用的第三方库,因此在退出python时CUDA Runtime已关闭,释放显存操作失败。由于大多数时候显存已释放,并不会引起问题。
**解决办法:** python程序退出时,先显式调用 `llm.release_memory()`方法。
# Default ignored files
/shelf/
/workspace.xml
XiaoZhihuiAssistant
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="11" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DBNavigator.Project.DataEditorManager">
<record-view-column-sorting-type value="BY_INDEX" />
<value-preview-text-wrapping value="true" />
<value-preview-pinned value="false" />
</component>
<component name="DBNavigator.Project.DatabaseEditorStateManager">
<last-used-providers />
</component>
<component name="DBNavigator.Project.DatabaseFileManager">
<open-files />
</component>
<component name="DBNavigator.Project.Settings">
<connections />
<browser-settings>
<general>
<display-mode value="TABBED" />
<navigation-history-size value="100" />
<show-object-details value="false" />
</general>
<filters>
<object-type-filter>
<object-type name="SCHEMA" enabled="true" />
<object-type name="USER" enabled="true" />
<object-type name="ROLE" enabled="true" />
<object-type name="PRIVILEGE" enabled="true" />
<object-type name="CHARSET" enabled="true" />
<object-type name="TABLE" enabled="true" />
<object-type name="VIEW" enabled="true" />
<object-type name="MATERIALIZED_VIEW" enabled="true" />
<object-type name="NESTED_TABLE" enabled="true" />
<object-type name="COLUMN" enabled="true" />
<object-type name="INDEX" enabled="true" />
<object-type name="CONSTRAINT" enabled="true" />
<object-type name="DATASET_TRIGGER" enabled="true" />
<object-type name="DATABASE_TRIGGER" enabled="true" />
<object-type name="SYNONYM" enabled="true" />
<object-type name="SEQUENCE" enabled="true" />
<object-type name="PROCEDURE" enabled="true" />
<object-type name="FUNCTION" enabled="true" />
<object-type name="PACKAGE" enabled="true" />
<object-type name="TYPE" enabled="true" />
<object-type name="TYPE_ATTRIBUTE" enabled="true" />
<object-type name="ARGUMENT" enabled="true" />
<object-type name="DIMENSION" enabled="true" />
<object-type name="CLUSTER" enabled="true" />
<object-type name="DBLINK" enabled="true" />
</object-type-filter>
</filters>
<sorting>
<object-type name="COLUMN" sorting-type="NAME" />
<object-type name="FUNCTION" sorting-type="NAME" />
<object-type name="PROCEDURE" sorting-type="NAME" />
<object-type name="ARGUMENT" sorting-type="POSITION" />
<object-type name="TYPE ATTRIBUTE" sorting-type="POSITION" />
</sorting>
<default-editors>
<object-type name="VIEW" editor-type="SELECTION" />
<object-type name="PACKAGE" editor-type="SELECTION" />
<object-type name="TYPE" editor-type="SELECTION" />
</default-editors>
</browser-settings>
<navigation-settings>
<lookup-filters>
<lookup-objects>
<object-type name="SCHEMA" enabled="true" />
<object-type name="USER" enabled="false" />
<object-type name="ROLE" enabled="false" />
<object-type name="PRIVILEGE" enabled="false" />
<object-type name="CHARSET" enabled="false" />
<object-type name="TABLE" enabled="true" />
<object-type name="VIEW" enabled="true" />
<object-type name="MATERIALIZED VIEW" enabled="true" />
<object-type name="INDEX" enabled="true" />
<object-type name="CONSTRAINT" enabled="true" />
<object-type name="DATASET TRIGGER" enabled="true" />
<object-type name="DATABASE TRIGGER" enabled="true" />
<object-type name="SYNONYM" enabled="false" />
<object-type name="SEQUENCE" enabled="true" />
<object-type name="PROCEDURE" enabled="true" />
<object-type name="FUNCTION" enabled="true" />
<object-type name="PACKAGE" enabled="true" />
<object-type name="TYPE" enabled="true" />
<object-type name="DIMENSION" enabled="false" />
<object-type name="CLUSTER" enabled="false" />
<object-type name="DBLINK" enabled="true" />
</lookup-objects>
<force-database-load value="false" />
<prompt-connection-selection value="true" />
<prompt-schema-selection value="true" />
</lookup-filters>
</navigation-settings>
<dataset-grid-settings>
<general>
<enable-zooming value="true" />
<enable-column-tooltip value="true" />
</general>
<sorting>
<nulls-first value="true" />
<max-sorting-columns value="4" />
</sorting>
<audit-columns>
<column-names value="" />
<visible value="true" />
<editable value="false" />
</audit-columns>
</dataset-grid-settings>
<dataset-editor-settings>
<text-editor-popup>
<active value="false" />
<active-if-empty value="false" />
<data-length-threshold value="100" />
<popup-delay value="1000" />
</text-editor-popup>
<values-actions-popup>
<show-popup-button value="true" />
<element-count-threshold value="1000" />
<data-length-threshold value="250" />
</values-actions-popup>
<general>
<fetch-block-size value="100" />
<fetch-timeout value="30" />
<trim-whitespaces value="true" />
<convert-empty-strings-to-null value="true" />
<select-content-on-cell-edit value="true" />
<large-value-preview-active value="true" />
</general>
<filters>
<prompt-filter-dialog value="true" />
<default-filter-type value="BASIC" />
</filters>
<qualified-text-editor text-length-threshold="300">
<content-types>
<content-type name="Text" enabled="true" />
<content-type name="Properties" enabled="true" />
<content-type name="XML" enabled="true" />
<content-type name="DTD" enabled="true" />
<content-type name="HTML" enabled="true" />
<content-type name="XHTML" enabled="true" />
<content-type name="Java" enabled="true" />
<content-type name="SQL" enabled="true" />
<content-type name="PL/SQL" enabled="true" />
<content-type name="JSON" enabled="true" />
<content-type name="JSON5" enabled="true" />
<content-type name="Groovy" enabled="true" />
<content-type name="AIDL" enabled="true" />
<content-type name="YAML" enabled="true" />
<content-type name="Manifest" enabled="true" />
</content-types>
</qualified-text-editor>
<record-navigation>
<navigation-target value="VIEWER" />
</record-navigation>
</dataset-editor-settings>
<code-editor-settings>
<general>
<show-object-navigation-gutter value="false" />
<show-spec-declaration-navigation-gutter value="true" />
<enable-spellchecking value="true" />
<enable-reference-spellchecking value="false" />
</general>
<confirmations>
<save-changes value="false" />
<revert-changes value="true" />
</confirmations>
</code-editor-settings>
<code-completion-settings>
<filters>
<basic-filter>
<filter-element type="RESERVED_WORD" id="keyword" selected="true" />
<filter-element type="RESERVED_WORD" id="function" selected="true" />
<filter-element type="RESERVED_WORD" id="parameter" selected="true" />
<filter-element type="RESERVED_WORD" id="datatype" selected="true" />
<filter-element type="RESERVED_WORD" id="exception" selected="true" />
<filter-element type="OBJECT" id="schema" selected="true" />
<filter-element type="OBJECT" id="role" selected="true" />
<filter-element type="OBJECT" id="user" selected="true" />
<filter-element type="OBJECT" id="privilege" selected="true" />
<user-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="false" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</user-schema>
<public-schema>
<filter-element type="OBJECT" id="table" selected="false" />
<filter-element type="OBJECT" id="view" selected="false" />
<filter-element type="OBJECT" id="materialized view" selected="false" />
<filter-element type="OBJECT" id="index" selected="false" />
<filter-element type="OBJECT" id="constraint" selected="false" />
<filter-element type="OBJECT" id="trigger" selected="false" />
<filter-element type="OBJECT" id="synonym" selected="false" />
<filter-element type="OBJECT" id="sequence" selected="false" />
<filter-element type="OBJECT" id="procedure" selected="false" />
<filter-element type="OBJECT" id="function" selected="false" />
<filter-element type="OBJECT" id="package" selected="false" />
<filter-element type="OBJECT" id="type" selected="false" />
<filter-element type="OBJECT" id="dimension" selected="false" />
<filter-element type="OBJECT" id="cluster" selected="false" />
<filter-element type="OBJECT" id="dblink" selected="false" />
</public-schema>
<any-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="true" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</any-schema>
</basic-filter>
<extended-filter>
<filter-element type="RESERVED_WORD" id="keyword" selected="true" />
<filter-element type="RESERVED_WORD" id="function" selected="true" />
<filter-element type="RESERVED_WORD" id="parameter" selected="true" />
<filter-element type="RESERVED_WORD" id="datatype" selected="true" />
<filter-element type="RESERVED_WORD" id="exception" selected="true" />
<filter-element type="OBJECT" id="schema" selected="true" />
<filter-element type="OBJECT" id="user" selected="true" />
<filter-element type="OBJECT" id="role" selected="true" />
<filter-element type="OBJECT" id="privilege" selected="true" />
<user-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="true" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</user-schema>
<public-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="true" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</public-schema>
<any-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="true" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</any-schema>
</extended-filter>
</filters>
<sorting enabled="true">
<sorting-element type="RESERVED_WORD" id="keyword" />
<sorting-element type="RESERVED_WORD" id="datatype" />
<sorting-element type="OBJECT" id="column" />
<sorting-element type="OBJECT" id="table" />
<sorting-element type="OBJECT" id="view" />
<sorting-element type="OBJECT" id="materialized view" />
<sorting-element type="OBJECT" id="index" />
<sorting-element type="OBJECT" id="constraint" />
<sorting-element type="OBJECT" id="trigger" />
<sorting-element type="OBJECT" id="synonym" />
<sorting-element type="OBJECT" id="sequence" />
<sorting-element type="OBJECT" id="procedure" />
<sorting-element type="OBJECT" id="function" />
<sorting-element type="OBJECT" id="package" />
<sorting-element type="OBJECT" id="type" />
<sorting-element type="OBJECT" id="dimension" />
<sorting-element type="OBJECT" id="cluster" />
<sorting-element type="OBJECT" id="dblink" />
<sorting-element type="OBJECT" id="schema" />
<sorting-element type="OBJECT" id="role" />
<sorting-element type="OBJECT" id="user" />
<sorting-element type="RESERVED_WORD" id="function" />
<sorting-element type="RESERVED_WORD" id="parameter" />
</sorting>
<format>
<enforce-code-style-case value="true" />
</format>
</code-completion-settings>
<execution-engine-settings>
<statement-execution>
<fetch-block-size value="100" />
<execution-timeout value="20" />
<debug-execution-timeout value="600" />
<focus-result value="false" />
<prompt-execution value="false" />
</statement-execution>
<script-execution>
<command-line-interfaces />
<execution-timeout value="300" />
</script-execution>
<method-execution>
<execution-timeout value="30" />
<debug-execution-timeout value="600" />
<parameter-history-size value="10" />
</method-execution>
</execution-engine-settings>
<operation-settings>
<transactions>
<uncommitted-changes>
<on-project-close value="ASK" />
<on-disconnect value="ASK" />
<on-autocommit-toggle value="ASK" />
</uncommitted-changes>
<multiple-uncommitted-changes>
<on-commit value="ASK" />
<on-rollback value="ASK" />
</multiple-uncommitted-changes>
</transactions>
<session-browser>
<disconnect-session value="ASK" />
<kill-session value="ASK" />
<reload-on-filter-change value="false" />
</session-browser>
<compiler>
<compile-type value="KEEP" />
<compile-dependencies value="ASK" />
<always-show-controls value="false" />
</compiler>
<debugger>
<debugger-type value="ASK" />
<use-generic-runners value="true" />
</debugger>
</operation-settings>
<ddl-file-settings>
<extensions>
<mapping file-type-id="VIEW" extensions="vw" />
<mapping file-type-id="TRIGGER" extensions="trg" />
<mapping file-type-id="PROCEDURE" extensions="prc" />
<mapping file-type-id="FUNCTION" extensions="fnc" />
<mapping file-type-id="PACKAGE" extensions="pkg" />
<mapping file-type-id="PACKAGE_SPEC" extensions="pks" />
<mapping file-type-id="PACKAGE_BODY" extensions="pkb" />
<mapping file-type-id="TYPE" extensions="tpe" />
<mapping file-type-id="TYPE_SPEC" extensions="tps" />
<mapping file-type-id="TYPE_BODY" extensions="tpb" />
</extensions>
<general>
<lookup-ddl-files value="true" />
<create-ddl-files value="false" />
<synchronize-ddl-files value="true" />
<use-qualified-names value="false" />
<make-scripts-rerunnable value="true" />
</general>
</ddl-file-settings>
<general-settings>
<regional-settings>
<date-format value="MEDIUM" />
<number-format value="UNGROUPED" />
<locale value="SYSTEM_DEFAULT" />
<use-custom-formats value="false" />
</regional-settings>
<environment>
<environment-types>
<environment-type id="development" name="Development" description="Development environment" color="-2430209/-12296320" readonly-code="false" readonly-data="false" />
<environment-type id="integration" name="Integration" description="Integration environment" color="-2621494/-12163514" readonly-code="true" readonly-data="false" />
<environment-type id="production" name="Production" description="Productive environment" color="-11574/-10271420" readonly-code="true" readonly-data="true" />
<environment-type id="other" name="Other" description="" color="-1576/-10724543" readonly-code="false" readonly-data="false" />
</environment-types>
<visibility-settings>
<connection-tabs value="true" />
<dialog-headers value="true" />
<object-editor-tabs value="true" />
<script-editor-tabs value="false" />
<execution-result-tabs value="true" />
</visibility-settings>
</environment>
</general-settings>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="deploymentTargetDropDown">
<runningDeviceTargetSelectedWithDropDown>
<Target>
<type value="RUNNING_DEVICE_TARGET" />
<deviceKey>
<Key>
<type value="SERIAL_NUMBER" />
<value value="adb-JRF67HLJ9HX85XCU-GNBKo9._adb-tls-connect._tcp" />
</Key>
</deviceKey>
</Target>
</runningDeviceTargetSelectedWithDropDown>
<timeTargetWasSelectedWithDropDown value="2023-07-28T10:10:24.047223600Z" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GradleMigrationSettings" migrationVersion="1" />
<component name="GradleSettings">
<option name="linkedExternalProjectsSettings">
<GradleProjectSettings>
<option name="testRunner" value="GRADLE" />
<option name="distributionType" value="DEFAULT_WRAPPED" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="gradleJvm" value="Embedded JDK" />
<option name="modules">
<set>
<option value="$PROJECT_DIR$" />
<option value="$PROJECT_DIR$/app" />
</set>
</option>
</GradleProjectSettings>
</option>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="true" project-jdk-name="11" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/build/classes" />
</component>
<component name="ProjectType">
<option name="id" value="Android" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
...@@ -2,7 +2,9 @@ ...@@ -2,7 +2,9 @@
## Benchmark ## Benchmark
测速示例程序,方便大家测试不同软硬件下的推理性能。作者测速度可以可参考[这里](doc/benchmark.md) 测速示例程序,方便大家测试不同软硬件下的推理性能。作者测试的速度可以参考[这里](doc/benchmark.md)
由于实际使用时很难满足batch的条件,也并非贪婪解码,该速度与真实使用时的速度有一定差异。
### 使用方法: ### 使用方法:
...@@ -38,13 +40,17 @@ fastllm工程目前分为CPU版本和GPU版本,为简单上手,在没有cmak ...@@ -38,13 +40,17 @@ fastllm工程目前分为CPU版本和GPU版本,为简单上手,在没有cmak
签出代码后,**修改 include/fastllm.h**,Visual Studio中点击”文件“ -> "高级保存选项",在编码中选择”Unicode (UTF-8 **带签名**) -代码页 65001“,或在其他文本编辑器中转为”UTF-8 BOM“编码。(由于linux下gcc不识别BOM头,该修改只能手动处理。) 签出代码后,**修改 include/fastllm.h**,Visual Studio中点击”文件“ -> "高级保存选项",在编码中选择”Unicode (UTF-8 **带签名**) -代码页 65001“,或在其他文本编辑器中转为”UTF-8 BOM“编码。(由于linux下gcc不识别BOM头,该修改只能手动处理。)
* **CPU版本**
* 如果本机没有安装CUDA,在Win32Demo项目“属性”中找到"链接器" -> "输入" -> "附加依赖项",点击'从父级或项目设置继承'。
* **GPU版本** * **GPU版本**
- 需要正确安装CUDA; - 需要正确安装CUDA及其中的Visual Studio Integration
- 正确配置CUDA_PATH环境变量,指向要编译的CUDA版本; - 正确配置CUDA_PATH环境变量,指向要编译的CUDA版本;
- 在解决方案中删除fastllm.vcproj,引入fastllm-gpu.vcproj, - 在解决方案资源管理器中移除fastllm.vcproj,引入fastllm-gpu.vcproj,
- 对fastllm-gpu项目,在”生成依赖项“ -> "生成自定义" 中手动添加已安装的CUDA的自定义项文件; - 对fastllm-gpu项目,在”生成依赖项“ -> "生成自定义" 中手动添加已安装的CUDA的自定义项文件;
- 对fastllm-gpu项目,在”属性“中找到"CUDA C/C++" -> "Device" -> "Code Generation" 中配置编译后支持的[GPU计算能力](https://developer.nvidia.com/cuda-gpus#compute)
- 在Win32Demo项目上选择”添加“ -> "引用“,勾选fastllm-gpu项目; - 在Win32Demo项目上选择”添加“ -> "引用“,勾选fastllm-gpu项目;
- 配置预处理器定义”USE_CUDA“。 - 在Win32Demo项目上配置预处理器定义”USE_CUDA“。
### 使用方法: ### 使用方法:
...@@ -61,15 +67,15 @@ Android,使用Android studio工具建立的一個Android平台上运行LLM程 ...@@ -61,15 +67,15 @@ Android,使用Android studio工具建立的一個Android平台上运行LLM程
### 使用方法: ### 使用方法:
1.直接AS打开运行。 1.在Android Studio直接打开工程运行。
2.直接下载release目录里里面的apk体验。 2.直接下载release目录里里面的apk体验。
3.可以通过CMake工具链编译main文件(具体步骤见主页的readme),通过adb shell运行, 3.可以通过CMake工具链编译main文件(具体步骤见主页的readme),通过adb shell运行,
1. adb push main /data/local/tmp 将main文件放到手机的tmp文件夹, 1. `adb push main /data/local/tmp` 将main文件放到手机的tmp文件夹,
2. adb shell , 2. `adb shell` ,
3. cd /data/local/tmp 3. `cd /data/local/tmp`
4. ./main 运行。 4. `./main` 运行。
注意:demo apk 会将模型文件复制到应用 data 目录以方便 native 读取,因此设备需准备至少两倍模型大小的空余空间 注意:demo apk 会将模型文件复制到应用 data 目录以方便 native 读取,因此设备需准备至少两倍模型大小的空余空间。
\ No newline at end of file \ No newline at end of file
...@@ -135,6 +135,7 @@ int chatllm(const char* prompt, int type) { ...@@ -135,6 +135,7 @@ int chatllm(const char* prompt, int type) {
}, *generationConfig); }, *generationConfig);
history = model->MakeHistory(history, sRound, input, ret); history = model->MakeHistory(history, sRound, input, ret);
sRound++;
return ret.length(); return ret.length();
} }
......
...@@ -71,7 +71,6 @@ ...@@ -71,7 +71,6 @@
<LinkIncremental>true</LinkIncremental> <LinkIncremental>true</LinkIncremental>
<LibraryPath>$(CUDA_PATH)\lib\Win32;$(LibraryPath)</LibraryPath> <LibraryPath>$(CUDA_PATH)\lib\Win32;$(LibraryPath)</LibraryPath>
<IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath> <IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath>
<TargetExt>.lib</TargetExt>
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(Platform)\$(Configuration)\</IntDir> <IntDir>$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup> </PropertyGroup>
...@@ -79,13 +78,11 @@ ...@@ -79,13 +78,11 @@
<LinkIncremental>true</LinkIncremental> <LinkIncremental>true</LinkIncremental>
<IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath> <IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath>
<LibraryPath>$(CUDA_PATH)\lib\x64;$(LibraryPath)</LibraryPath> <LibraryPath>$(CUDA_PATH)\lib\x64;$(LibraryPath)</LibraryPath>
<TargetExt>.lib</TargetExt>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
<LibraryPath>$(CUDA_PATH)\lib\Win32;$(LibraryPath)</LibraryPath> <LibraryPath>$(CUDA_PATH)\lib\Win32;$(LibraryPath)</LibraryPath>
<IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath> <IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath>
<TargetExt>.lib</TargetExt>
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(Platform)\$(Configuration)\</IntDir> <IntDir>$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup> </PropertyGroup>
...@@ -93,7 +90,6 @@ ...@@ -93,7 +90,6 @@
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
<IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath> <IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath>
<LibraryPath>$(CUDA_PATH)\lib\x64;$(LibraryPath)</LibraryPath> <LibraryPath>$(CUDA_PATH)\lib\x64;$(LibraryPath)</LibraryPath>
<TargetExt>.lib</TargetExt>
</PropertyGroup> </PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile> <ClCompile>
...@@ -111,7 +107,7 @@ ...@@ -111,7 +107,7 @@
<SubSystem>Windows</SubSystem> <SubSystem>Windows</SubSystem>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_61,sm_61;%(CodeGeneration)</CodeGeneration> <CodeGeneration>compute_61,sm_61;compute_75,sm_75;compute_86,sm_86;%(CodeGeneration)</CodeGeneration>
</CudaCompile> </CudaCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
...@@ -130,7 +126,7 @@ ...@@ -130,7 +126,7 @@
<SubSystem>Windows</SubSystem> <SubSystem>Windows</SubSystem>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_61,sm_61;%(CodeGeneration)</CodeGeneration> <CodeGeneration>compute_61,sm_61;compute_75,sm_75;compute_86,sm_86;%(CodeGeneration)</CodeGeneration>
</CudaCompile> </CudaCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
...@@ -153,7 +149,7 @@ ...@@ -153,7 +149,7 @@
<OptimizeReferences>true</OptimizeReferences> <OptimizeReferences>true</OptimizeReferences>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_61,sm_61;%(CodeGeneration)</CodeGeneration> <CodeGeneration>compute_61,sm_61;compute_75,sm_75;compute_86,sm_86;%(CodeGeneration)</CodeGeneration>
</CudaCompile> </CudaCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
...@@ -176,7 +172,7 @@ ...@@ -176,7 +172,7 @@
<OptimizeReferences>true</OptimizeReferences> <OptimizeReferences>true</OptimizeReferences>
</Link> </Link>
<CudaCompile> <CudaCompile>
<CodeGeneration>compute_61,sm_61;%(CodeGeneration)</CodeGeneration> <CodeGeneration>compute_61,sm_61;compute_75,sm_75;compute_86,sm_86;%(CodeGeneration)</CodeGeneration>
<FastMath>true</FastMath> <FastMath>true</FastMath>
</CudaCompile> </CudaCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>
...@@ -191,6 +187,7 @@ ...@@ -191,6 +187,7 @@
<ClInclude Include="..\..\include\models\basellm.h" /> <ClInclude Include="..\..\include\models\basellm.h" />
<ClInclude Include="..\..\include\models\chatglm.h" /> <ClInclude Include="..\..\include\models\chatglm.h" />
<ClInclude Include="..\..\include\models\factoryllm.h" /> <ClInclude Include="..\..\include\models\factoryllm.h" />
<ClInclude Include="..\..\include\models\glm.h" />
<ClInclude Include="..\..\include\models\llama.h" /> <ClInclude Include="..\..\include\models\llama.h" />
<ClInclude Include="..\..\include\models\moss.h" /> <ClInclude Include="..\..\include\models\moss.h" />
<ClInclude Include="..\..\include\models\qwen.h" /> <ClInclude Include="..\..\include\models\qwen.h" />
...@@ -208,6 +205,7 @@ ...@@ -208,6 +205,7 @@
<ClCompile Include="..\..\src\model.cpp" /> <ClCompile Include="..\..\src\model.cpp" />
<ClCompile Include="..\..\src\models\basellm.cpp" /> <ClCompile Include="..\..\src\models\basellm.cpp" />
<ClCompile Include="..\..\src\models\chatglm.cpp" /> <ClCompile Include="..\..\src\models\chatglm.cpp" />
<ClCompile Include="..\..\src\models\glm.cpp" />
<ClCompile Include="..\..\src\models\llama.cpp" /> <ClCompile Include="..\..\src\models\llama.cpp" />
<ClCompile Include="..\..\src\models\moss.cpp" /> <ClCompile Include="..\..\src\models\moss.cpp" />
<ClCompile Include="..\..\src\models\qwen.cpp" /> <ClCompile Include="..\..\src\models\qwen.cpp" />
......
...@@ -72,25 +72,21 @@ ...@@ -72,25 +72,21 @@
<LinkIncremental>true</LinkIncremental> <LinkIncremental>true</LinkIncremental>
<LibraryPath>$(CUDA_PATH)\lib\Win32;$(LibraryPath)</LibraryPath> <LibraryPath>$(CUDA_PATH)\lib\Win32;$(LibraryPath)</LibraryPath>
<IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath> <IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath>
<TargetExt>.lib</TargetExt>
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(Platform)\$(Configuration)\</IntDir> <IntDir>$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental> <LinkIncremental>true</LinkIncremental>
<TargetExt>.lib</TargetExt>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
<LibraryPath>$(CUDA_PATH)\lib\Win32;$(LibraryPath)</LibraryPath> <LibraryPath>$(CUDA_PATH)\lib\Win32;$(LibraryPath)</LibraryPath>
<IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath> <IncludePath>$(CUDA_PATH)\include;$(IncludePath)</IncludePath>
<TargetExt>.lib</TargetExt>
<OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
<IntDir>$(Platform)\$(Configuration)\</IntDir> <IntDir>$(Platform)\$(Configuration)\</IntDir>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
<TargetExt>.lib</TargetExt>
</PropertyGroup> </PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile> <ClCompile>
...@@ -172,10 +168,6 @@ ...@@ -172,10 +168,6 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding> <EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences> <OptimizeReferences>true</OptimizeReferences>
</Link> </Link>
<CudaCompile>
<CodeGeneration>compute_61,sm_61;%(CodeGeneration)</CodeGeneration>
<FastMath>true</FastMath>
</CudaCompile>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\..\include\device.h" /> <ClInclude Include="..\..\include\device.h" />
...@@ -187,6 +179,7 @@ ...@@ -187,6 +179,7 @@
<ClInclude Include="..\..\include\models\basellm.h" /> <ClInclude Include="..\..\include\models\basellm.h" />
<ClInclude Include="..\..\include\models\chatglm.h" /> <ClInclude Include="..\..\include\models\chatglm.h" />
<ClInclude Include="..\..\include\models\factoryllm.h" /> <ClInclude Include="..\..\include\models\factoryllm.h" />
<ClInclude Include="..\..\include\models\glm.h" />
<ClInclude Include="..\..\include\models\llama.h" /> <ClInclude Include="..\..\include\models\llama.h" />
<ClInclude Include="..\..\include\models\moss.h" /> <ClInclude Include="..\..\include\models\moss.h" />
<ClInclude Include="..\..\include\models\qwen.h" /> <ClInclude Include="..\..\include\models\qwen.h" />
...@@ -202,6 +195,7 @@ ...@@ -202,6 +195,7 @@
<ClCompile Include="..\..\src\model.cpp" /> <ClCompile Include="..\..\src\model.cpp" />
<ClCompile Include="..\..\src\models\basellm.cpp" /> <ClCompile Include="..\..\src\models\basellm.cpp" />
<ClCompile Include="..\..\src\models\chatglm.cpp" /> <ClCompile Include="..\..\src\models\chatglm.cpp" />
<ClCompile Include="..\..\src\models\glm.cpp" />
<ClCompile Include="..\..\src\models\llama.cpp" /> <ClCompile Include="..\..\src\models\llama.cpp" />
<ClCompile Include="..\..\src\models\moss.cpp" /> <ClCompile Include="..\..\src\models\moss.cpp" />
<ClCompile Include="..\..\src\models\qwen.cpp" /> <ClCompile Include="..\..\src\models\qwen.cpp" />
......
...@@ -252,7 +252,11 @@ struct WorkQueue { ...@@ -252,7 +252,11 @@ struct WorkQueue {
while (true) { while (true) {
std::unique_lock <std::mutex> lock(ts->locker); std::unique_lock <std::mutex> lock(ts->locker);
if (ts->activateQueryNumber >= ts->maxActivateQueryNumber) { if (ts->activateQueryNumber >= ts->maxActivateQueryNumber) {
#ifdef WIN32
Sleep(0);
#else
sleep(0); sleep(0);
#endif
continue; continue;
} }
if (ts->q.empty()) { if (ts->q.empty()) {
...@@ -435,7 +439,11 @@ int main(int argc, char** argv) { ...@@ -435,7 +439,11 @@ int main(int argc, char** argv) {
buff[size] = 0; buff[size] = 0;
while (workQueue.q.size() > workQueue.maxActivateQueryNumber) { while (workQueue.q.size() > workQueue.maxActivateQueryNumber) {
#ifdef WIN32
Sleep(0);
#else
sleep(0); sleep(0);
#endif
} }
workQueue.Push(buff, client); workQueue.Push(buff, client);
} }
......
//
// Created by huangyuyang on 6/9/23.
//
#include "model.h" #include "model.h"
#include "utils.h" #include "utils.h"
#include "fstream" #include "fstream"
#include <unistd.h>
#if defined(_WIN32) or defined(_WIN64) #if defined(_WIN32) or defined(_WIN64)
#include <codecvt> #include <codecvt>
...@@ -28,6 +25,7 @@ struct BenchmarkConfig { ...@@ -28,6 +25,7 @@ struct BenchmarkConfig {
int batch = -1; // batch数, -1时使用文件中的行数作为batch int batch = -1; // batch数, -1时使用文件中的行数作为batch
std::string file; // 输入文件 std::string file; // 输入文件
std::string output; // 输出文件,如果不设定则输出到屏幕 std::string output; // 输出文件,如果不设定则输出到屏幕
int runloop = 0;
}; };
void Usage() { void Usage() {
...@@ -38,6 +36,7 @@ void Usage() { ...@@ -38,6 +36,7 @@ void Usage() {
std::cout << "<-l|--limit> <args>: 输出token数限制" << std::endl; std::cout << "<-l|--limit> <args>: 输出token数限制" << std::endl;
std::cout << "<-b|--batch> <args>: batch数" << std::endl; std::cout << "<-b|--batch> <args>: batch数" << std::endl;
std::cout << "<-f|--file> <args>: 输入文件,文件中每行一个prompt,如果行数不足batch则用之前的prompt补充" << std::endl; std::cout << "<-f|--file> <args>: 输入文件,文件中每行一个prompt,如果行数不足batch则用之前的prompt补充" << std::endl;
std::cout << "<-o|--output> <args>: 输出结果写文件,如果不设定则输出到屏幕" << std::endl;
} }
void ParseArgs(int argc, char **argv, BenchmarkConfig &config) { void ParseArgs(int argc, char **argv, BenchmarkConfig &config) {
...@@ -62,6 +61,8 @@ void ParseArgs(int argc, char **argv, BenchmarkConfig &config) { ...@@ -62,6 +61,8 @@ void ParseArgs(int argc, char **argv, BenchmarkConfig &config) {
config.file = sargv[++i]; config.file = sargv[++i];
} else if (sargv[i] == "-o" || sargv[i] == "--output") { } else if (sargv[i] == "-o" || sargv[i] == "--output") {
config.output = sargv[++i]; config.output = sargv[++i];
} else if (sargv[i] == "--loop") {
config.runloop = 1;
} else { } else {
Usage(); Usage();
exit(-1); exit(-1);
...@@ -69,6 +70,11 @@ void ParseArgs(int argc, char **argv, BenchmarkConfig &config) { ...@@ -69,6 +70,11 @@ void ParseArgs(int argc, char **argv, BenchmarkConfig &config) {
} }
} }
static double GetSpan(std::chrono::system_clock::time_point time1, std::chrono::system_clock::time_point time2) {
auto duration = std::chrono::duration_cast<std::chrono::microseconds> (time2 - time1);
return double(duration.count()) * std::chrono::microseconds::period::num / std::chrono::microseconds::period::den;
};
int main(int argc, char **argv) { int main(int argc, char **argv) {
BenchmarkConfig config; BenchmarkConfig config;
ParseArgs(argc, argv, config); ParseArgs(argc, argv, config);
...@@ -79,11 +85,13 @@ int main(int argc, char **argv) { ...@@ -79,11 +85,13 @@ int main(int argc, char **argv) {
exit(0); exit(0);
} }
model_file.close(); model_file.close();
// fastllm::SetDeviceMap({{"cuda:0", 1}, {"cuda:1", 1}});
auto model = fastllm::CreateLLMModelFromFile(config.path); auto model = fastllm::CreateLLMModelFromFile(config.path);
fastllm::GenerationConfig generationConfig; fastllm::GenerationConfig generationConfig;
generationConfig.output_token_limit = config.limit; generationConfig.output_token_limit = config.limit;
generationConfig.repeat_penalty = 1.1;
fastllm::PrintInstructionInfo(); // fastllm::PrintInstructionInfo();
std::vector <std::string> inputs; std::vector <std::string> inputs;
if (config.file != "") { if (config.file != "") {
std::ifstream finputs(config.file, std::ios::in); std::ifstream finputs(config.file, std::ios::in);
...@@ -119,6 +127,8 @@ int main(int argc, char **argv) { ...@@ -119,6 +127,8 @@ int main(int argc, char **argv) {
promptTokenNum += model->weight.tokenizer.Encode(inputs[i]).Count(0); promptTokenNum += model->weight.tokenizer.Encode(inputs[i]).Count(0);
} }
if(config.runloop != 1) {
std::vector <std::string> outputs; std::vector <std::string> outputs;
static int tokens = 0; static int tokens = 0;
auto st = std::chrono::system_clock::now(); auto st = std::chrono::system_clock::now();
...@@ -145,11 +155,11 @@ int main(int argc, char **argv) { ...@@ -145,11 +155,11 @@ int main(int argc, char **argv) {
fclose(fo); fclose(fo);
} else { } else {
for (int i = 0; i < outputs.size(); i++) { for (int i = 0; i < outputs.size(); i++) {
#if defined(_WIN32) or defined(_WIN64) #if defined(_WIN32) or defined(_WIN64)
printf("[ user: \"%s\", model: \"%s\"]\n", utf8_to_gbk(inputs[i]).c_str(), utf8_to_gbk(outputs[i]).c_str()); printf("[ user: \"%s\", model: \"%s\"]\n", utf8_to_gbk(inputs[i]).c_str(), utf8_to_gbk(outputs[i]).c_str());
#else #else
printf("[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str()); printf("[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
#endif #endif
} }
} }
...@@ -158,5 +168,40 @@ int main(int argc, char **argv) { ...@@ -158,5 +168,40 @@ int main(int argc, char **argv) {
printf("prompt use %f s\n", promptSpend); printf("prompt use %f s\n", promptSpend);
printf("prompt speed = %f tokens / s\n", (float)promptTokenNum / promptSpend); printf("prompt speed = %f tokens / s\n", (float)promptTokenNum / promptSpend);
printf("output %d tokens\nuse %f s\nspeed = %f tokens / s\n", tokens, spend, tokens / spend); printf("output %d tokens\nuse %f s\nspeed = %f tokens / s\n", tokens, spend, tokens / spend);
}
else
{
static int tokens = 0;
while(true){
tokens = 0;
std::vector <std::string> outputs;
auto st = std::chrono::system_clock::now();
static auto promptTime = st;
model->ResponseBatch(inputs, outputs, [](int index, std::vector<std::string> &contents) {
if (index != -1) {
if (index == 0) {
promptTime = std::chrono::system_clock::now();
} else {
for (int i = 0; i < contents.size(); i++) {
tokens += (contents[i].size() > 0);
}
}
}
}, generationConfig);
float promptSpend = fastllm::GetSpan(st, promptTime);
float spend = fastllm::GetSpan(promptTime, std::chrono::system_clock::now());
if (config.output != "") {
FILE *fo = fopen(config.output.c_str(), "w");
for (int i = 0; i < outputs.size(); i++) {
fprintf(fo, "[ user: \"%s\", model: \"%s\"]\n", inputs[i].c_str(), outputs[i].c_str());
}
fclose(fo);
}
pid_t pid = getpid();
printf("pid %d : batch=%d, prompt %d tokens, prompt use %0.2fs, prompt speed=%0.2ftokens/s, output %d tokens, use %0.2fs, tps=%0.2ftokens/s\n",
pid, (int)inputs.size(), promptTokenNum, promptSpend, (float)promptTokenNum / promptSpend, tokens, spend, tokens / spend);
}
}
return 0; return 0;
} }
\ No newline at end of file
#!/bin/bash
#fp16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts/beijing.txt -l 128 -b 1
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts/beijing.txt -l 128 -b 16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts/beijing.txt -l 128 -b 18
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 32
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 64
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 96
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 100
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 102
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 1
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 18
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 32
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 64
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 92
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 96
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 1
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 6
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 8
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 30
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 32
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 34
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 1
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 2
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 8
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-fp16.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 20
#int8
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts/beijing.txt -l 128 -b 1
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts/beijing.txt -l 128 -b 16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts/beijing.txt -l 128 -b 18
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 32
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 64
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 96
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 112
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 120
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 124
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 128
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 144
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 146
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 148
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//beijing.txt -l 128 -b 150
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 1
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 18
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 32
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 64
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 96
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 112
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//100tokens.txt -l 128 -b 128
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 1
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 6
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 8
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 15
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 32
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 36
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 40
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 48
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//500tokens.txt -l 128 -b 52
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 1
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 2
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 7
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 8
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 16
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 18
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 20
./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 21
#./benchmark -p /home/xzhou/baichuan-13b-chat-flm/baichuan-13b-chat-int8.flm -f /home/xzhou/prompts//1000tokens.txt -l 128 -b 26
\ No newline at end of file
#!/bin/bash
# 定义要运行的程序命令
program="./benchmark -p ../../../models/chatglm2-6b/chatglm2-6b-flm/chatglm2-6b-int8.flm -f ../../../models/chatglm2-6b/prompts/100tokens.txt --loop"
# 定义要运行的实例数量
num_instances=2
# 启动后台进程
for ((i=1; i<=num_instances; i++)); do
$program &
done
# 压测持续时间(秒)
test_duration=120
# 等待一段时间以进行压测
sleep $test_duration
# 杀死所有后台进程
pkill -f benchmark
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment