Commit aefd9f11 authored by zhouxiang's avatar zhouxiang
Browse files

dcu平台fastllm推理框架

parents
Pipeline #543 failed with stages
in 0 seconds
*.log
*.pyc
token
/cmake-build-debug/
/build-tfacc/
/build-android/
/build-py/
/build/
/pyfastllm/build/
/pyfastllm/dist/
/.idea/
/.vscode/
/example/Win32Demo/bin/*.*
/example/Win32Demo/Win32
/example/Win32Demo/x64
/example/Win32Demo/*.filters
/example/Win32Demo/*.user
/example/Win32Demo/.vs
/example/Android/LLMAssistant/*.iml
/example/Android/LLMAssistant/.gradle
/example/Android/LLMAssistant/local.properties
/example/Android/LLMAssistant/.idea/caches
/example/Android/LLMAssistant/.idea/libraries
/example/Android/LLMAssistant/.idea/modules.xml
/example/Android/LLMAssistant/.idea/workspace.xml
/example/Android/LLMAssistant/.idea/navEditor.xml
/example/Android/LLMAssistant/.idea/assetWizardSettings.xml
/example/Android/LLMAssistant/.DS_Store
/example/Android/LLMAssistant/build
/example/Android/LLMAssistant/captures
/example/Android/LLMAssistant/.externalNativeBuild
/example/Android/LLMAssistant/.cxx
/example/Android/LLMAssistant/local.properties
/test/cmmlu/result/
/models/
[submodule "third_party/pybind11"]
path = third_party/pybind11
url = https://github.com/pybind/pybind11.git
cmake_minimum_required(VERSION 3.5)
project(fastllm LANGUAGES CXX)
option(USE_CUDA "use cuda" ON)
option(PY_API "python api" OFF)
option(USE_MMAP "use mmap" OFF)
message(STATUS "USE_CUDA: ${USE_CUDA}")
message(STATUS "PYTHON_API: ${PY_API}")
set(CMAKE_BUILD_TYPE "Release")
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread --std=c++17 -O2")
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNOMINMAX -O2 /std:c++17 /arch:AVX /source-charset:utf-8")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread --std=c++17 -O2 -g")
endif()
message(STATUS "CMAKE_CXX_FLAGS" ${CMAKE_CXX_FLAGS})
set(FASTLLM_CXX_SOURCES src/fastllm.cpp src/device.cpp src/model.cpp src/executor.cpp
src/devices/cpu/cpudevice.cpp src/devices/cpu/cpudevicebatch.cpp
src/models/chatglm.cpp src/models/moss.cpp src/models/llama.cpp src/models/qwen.cpp src/models/basellm.cpp)
include_directories(include)
include_directories(include/utils)
include_directories(include/models)
if (USE_MMAP)
add_compile_definitions(USE_MMAP)
endif()
if (USE_CUDA)
enable_language(CUDA)
add_compile_definitions(USE_CUDA)
include_directories(include/devices/cuda)
#message(${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
set(FASTLLM_CUDA_SOURCES src/devices/cuda/cudadevice.cpp src/devices/cuda/cudadevicebatch.cpp src/devices/cuda/fastllm-cuda.cu)
set(FASTLLM_LINKED_LIBS ${FASTLLM_LINKED_LIBS} cublas)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -g --gpu-max-threads-per-block=1024")
#set(CMAKE_CUDA_ARCHITECTURES "70")
endif()
if (PY_API)
set(PYBIND third_party/pybind11)
add_subdirectory(${PYBIND})
add_compile_definitions(PY_API)
set(Python3_ROOT_DIR "/usr/local/python3.10.6/bin/")
find_package(Python3 REQUIRED)
include_directories(third_party/pybind11/include)
file(GLOB FASTLLM_CXX_HEADERS include/**/*.h)
add_library(pyfastllm MODULE src/pybinding.cpp ${FASTLLM_CXX_SOURCES} ${FASTLLM_CXX_HEADERS} ${FASTLLM_CUDA_SOURCES})
target_link_libraries(pyfastllm PUBLIC pybind11::module ${FASTLLM_LINKED_LIBS})
pybind11_extension(pyfastllm)
else()
add_library(fastllm OBJECT
${FASTLLM_CXX_SOURCES}
${FASTLLM_CUDA_SOURCES}
)
target_link_libraries(fastllm PUBLIC ${FASTLLM_LINKED_LIBS})
add_executable(main main.cpp)
target_link_libraries(main fastllm)
add_executable(quant tools/src/quant.cpp)
target_link_libraries(quant fastllm)
add_executable(webui example/webui/webui.cpp)
target_link_libraries(webui fastllm)
add_custom_command(
TARGET webui
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory web
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/example/webui/web ${CMAKE_BINARY_DIR}/web
)
add_executable(benchmark example/benchmark/benchmark.cpp)
target_link_libraries(benchmark fastllm)
add_library(fastllm_tools SHARED ${FASTLLM_CXX_SOURCES} ${FASTLLM_CUDA_SOURCES} tools/src/pytools.cpp)
target_link_libraries(fastllm_tools PUBLIC ${FASTLLM_LINKED_LIBS})
if (${CMAKE_HOST_WIN32})
add_custom_command(
TARGET fastllm_tools
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory tools
COMMAND ${CMAKE_COMMAND} -E make_directory tools/fastllm_pytools
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/tools/fastllm_pytools ${CMAKE_BINARY_DIR}/tools/fastllm_pytools/.
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/tools/scripts ${CMAKE_BINARY_DIR}/tools/.
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/fastllm_tools.dll ${CMAKE_BINARY_DIR}/tools/fastllm_pytools/.
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_BINARY_DIR}/fastllm_tools.dll
)
else()
add_custom_command(
TARGET fastllm_tools
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory tools
COMMAND ${CMAKE_COMMAND} -E make_directory tools/fastllm_pytools
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/tools/fastllm_pytools ${CMAKE_BINARY_DIR}/tools/fastllm_pytools/.
COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/tools/scripts ${CMAKE_BINARY_DIR}/tools/.
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/libfastllm_tools.* ${CMAKE_BINARY_DIR}/tools/fastllm_pytools/.
COMMAND ${CMAKE_COMMAND} -E remove ${CMAKE_BINARY_DIR}/libfastllm_tools.*
)
endif()
endif()
\ No newline at end of file
# syntax=docker/dockerfile:1-labs
FROM nvidia/cuda:11.7.1-devel-ubuntu22.04
# Update Apt repositories
RUN apt-get update
# Install and configure Python
RUN apt-get -y --no-install-recommends install wget build-essential python3.10 python3-pip
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1
RUN pip install setuptools streamlit-chat
ENV WORKDIR /fastllm
# Install cmake
RUN wget -c https://cmake.org/files/LatestRelease/cmake-3.27.0-linux-x86_64.sh && bash ./cmake-3.27.0-linux-x86_64.sh --skip-license --prefix=/usr/
WORKDIR $WORKDIR
ADD . $WORKDIR/
RUN mkdir $WORKDIR/build && cd build && cmake .. -DUSE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native && make -j && cd tools && python setup.py install
CMD /fastllm/build/webui -p /models/chatglm2-6b-int8.flm
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# fastllm
## 介绍
fastllm是纯c++实现的高性能大模型推理库
## 功能概述
- 🚀 纯c++实现,便于跨平台移植,可以在安卓上直接编译
- 🚀 ARM平台支持NEON指令集加速,X86平台支持AVX指令集加速,NVIDIA平台支持CUDA加速,各个平台速度都很快就是了
- 🚀 支持浮点模型(FP32), 半精度模型(FP16), 量化模型(INT8, INT4) 加速
- 🚀 支持多卡部署,支持GPU + CPU混合部署
- 🚀 支持Batch速度优化
- 🚀 支持并发计算时动态拼Batch
- 🚀 支持流式输出,很方便实现打字机效果
- 🚀 支持python调用
- 🚀 前后端分离设计,便于支持新的计算设备
- 🚀 目前支持ChatGLM模型,各种LLAMA模型(ALPACA, VICUNA等),BAICHUAN模型,MOSS模型
## 快速开始
### 环境准备
在光源可拉取推理的docker镜像,拉取方式如下:
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/custom:glm-ft-v1.0
```
### 编译
使用如下命令编译
``` sh
cd fastllm
mkdir build
cd build
cmake ..
make -j
```
编译完成后,可以使用如下命令安装简易python工具包
``` sh
cd tools # 这时在fastllm/build/tools目录下
python setup.py install
```
### 运行demo程序
我们假设已经获取了名为`model.flm`的模型(参照 [模型获取](#模型获取),初次使用可以先下载转换好的模型)
编译完成之后在build目录下可以使用下列demo:
``` sh
# 这时在fastllm/build目录下
# 命令行聊天程序, 支持打字机效果
./main -p model.flm
# 简易webui, 使用流式输出 + 动态batch,可多路并发访问
./webui -p model.flm --port 1234
# python版本的命令行聊天程序,使用了模型创建以及流式对话效果
python tools/cli_demo.py -p model.flm
# python版本的简易webui,需要先安装streamlit-chat
streamlit run tools/web_demo.py model.flm
```
### 简易python调用
编译后如果安装了简易python工具包,那么可以使用python来调用一些基本的API (如果没有安装,也可以在直接import编译生成的tools/fastllm_pytools来使用)
``` python
# 模型创建
from fastllm_pytools import llm
model = llm.model("model.flm")
# 生成回复
print(model.response("你好"))
# 流式生成回复
for response in model.stream_response("你好"):
print(response, flush = True, end = "")
```
### 两行代码加速ChatGLM推理
安装了简易python工具包后,只需要在原本的推理程序中加入两行即可使用fastllm加速
``` python
# 这是原来的程序,通过huggingface接口创建模型
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code = True)
model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code = True)
# 加入下面这两行,将huggingface模型转换成fastllm模型
# 目前from_hf接口只能接受原始模型,或者ChatGLM的int4, int8量化模型,暂时不能转换其它量化模型
from fastllm_pytools import llm
model = llm.from_hf(model, tokenizer, dtype = "float16") # dtype支持 "float16", "int8", "int4"
# 注释掉这一行model.eval()
#model = model.eval()
```
model支持了ChatGLM的API函数chat, stream_chat,因此ChatGLM的demo程序无需改动其他代码即可运行
model还支持下列API用于生成回复
``` python
# 生成回复
print(model.response("你好"))
# 流式生成回复
for response in model.stream_response("你好"):
print(response, flush = True, end = "")
```
转好的模型也可以导出到本地文件,之后可以直接读取,也可以使用fastllm cpp接口读取
``` python
model.save("model.flm"); # 导出fastllm模型
new_model = llm.model("model.flm"); # 导入fastllm模型
```
## 多卡部署
### fastllm_pytools中使用多卡部署
``` python
from fastllm_pytools import llm
# 支持下列三种方式,需要在模型创建之前调用
llm.set_device_map("cuda:0") # 将模型部署在单一设备上
llm.set_device_map(["cuda:0", "cuda:1"]) # 将模型平均部署在多个设备上
llm.set_device_map({"cuda:0" : 10, "cuda:1" : 5, "cpu": 1}) # 将模型按不同比例部署在多个设备上
```
### c++中使用多卡部署
``` cpp
// 支持以下方式,需要在模型创建之前调用
fastllm::SetDeviceMap({{"cuda:0", 10}, {"cuda:1", 5}, {"cpu", 1}}); // 将模型按不同比例部署在多个设备上
```
## 模型转换
### ChatGLM模型导出 (默认脚本导出ChatGLM2-6b模型)
``` sh
# 需要先安装ChatGLM-6B环境
# 如果使用自己finetune的模型需要修改chatglm_export.py文件中创建tokenizer, model的代码
cd build
python3 tools/chatglm_export.py chatglm2-6b-fp16.flm float16 #导出float16模型
python3 tools/chatglm_export.py chatglm2-6b-int8.flm int8 #导出int8模型
python3 tools/chatglm_export.py chatglm2-6b-int4.flm int4 #导出int4模型
```
### baichuan模型导出 (默认脚本导出baichuan-13b-chat模型)
``` sh
# 需要先安装baichuan环境
# 如果使用自己finetune的模型需要修改baichuan2flm.py文件中创建tokenizer, model的代码
# 根据所需的精度,导出相应的模型
cd build
python3 tools/baichuan2flm.py baichuan-13b-fp16.flm float16 #导出float16模型
python3 tools/baichuan2flm.py baichuan-13b-int8.flm int8 #导出int8模型
python3 tools/baichuan2flm.py baichuan-13b-int4.flm int4 #导出int4模型
```
### MOSS模型导出
``` sh
# 需要先安装MOSS环境
# 如果使用自己finetune的模型需要修改moss_export.py文件中创建tokenizer, model的代码
# 根据所需的精度,导出相应的模型
cd build
python3 tools/moss_export.py moss-fp16.flm float16 #导出float16模型
python3 tools/moss_export.py moss-int8.flm int8 #导出int8模型
python3 tools/moss_export.py moss-int4.flm int4 #导出int4模型
```
### LLAMA系列模型导出
``` sh
# 修改build/tools/alpaca2flm.py程序进行导出
# 不同llama模型使用的指令相差很大,需要参照torch2flm.py中的参数进行配置
```
### QWEN模型导出
```sh
# 需要先安装QWen环境
# 如果使用自己finetune的模型需要修改qwen2flm.py文件中创建tokenizer, model的代码
# 根据所需的精度,导出相应的模型
cd build
python3 tools/qwen2flm.py qwen-7b-fp16.flm float16 #导出float16模型
python3 tools/qwen2flm.py qwen-7b-int8.flm int8 #导出int8模型
python3 tools/qwen2flm.py qwen-7b-int4.flm int4 #导出int4模型
```
### NSQL模型导出
``` sh
# 需要先安装NSQL环境
# 如果使用自己finetune的模型需要修改nsqlllama2flm.py文件中创建tokenizer, model的代码
# 根据所需的精度,导出相应的模型
cd build
python3 tools/nsqlllama2flm.py nsql-fp16.flm float16 #导出float16模型
python3 tools/nsqlllama2flm,py nsql-int8.flm int8 #导出int8模型
python3 tools/nsqlllama2flm.py nsql-int4.flm int4 #导出int4模型
```
version: '3.8'
services:
fastllm:
build:
context: .
args:
DOCKER_BUILDKIT: 0
# privileged: true
platforms:
- "linux/amd64"
tags:
- "fastllm:v0.9"
restart: always
ports:
- 11234:8081
volumes:
- ./models/:/models/
command: /fastllm/build/webui -p /models/chatglm2-6b-int8.flm -w ./example/webui/web
## 推理速度
可以使用benchmark程序进行测速,根据不同配置、不同输入,推理速度也会有一些差别
例如:
``` sh
./benchmark -p ~/chatglm-6b-int4.flm -f ../example/benchmark/prompts/beijing.txt -b 1
./benchmark -p ~/chatglm-6b-int8.flm -f ../example/benchmark/prompts/beijing.txt -b 1
./benchmark -p ~/chatglm-6b-fp16.flm -f ../example/benchmark/prompts/hello.txt -b 512 -l 18
```
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties
# Default ignored files
/shelf/
/workspace.xml
XiaoZhihuiAssistant
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<bytecodeTargetLevel target="11" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="DBNavigator.Project.DataEditorManager">
<record-view-column-sorting-type value="BY_INDEX" />
<value-preview-text-wrapping value="true" />
<value-preview-pinned value="false" />
</component>
<component name="DBNavigator.Project.DatabaseEditorStateManager">
<last-used-providers />
</component>
<component name="DBNavigator.Project.DatabaseFileManager">
<open-files />
</component>
<component name="DBNavigator.Project.Settings">
<connections />
<browser-settings>
<general>
<display-mode value="TABBED" />
<navigation-history-size value="100" />
<show-object-details value="false" />
</general>
<filters>
<object-type-filter>
<object-type name="SCHEMA" enabled="true" />
<object-type name="USER" enabled="true" />
<object-type name="ROLE" enabled="true" />
<object-type name="PRIVILEGE" enabled="true" />
<object-type name="CHARSET" enabled="true" />
<object-type name="TABLE" enabled="true" />
<object-type name="VIEW" enabled="true" />
<object-type name="MATERIALIZED_VIEW" enabled="true" />
<object-type name="NESTED_TABLE" enabled="true" />
<object-type name="COLUMN" enabled="true" />
<object-type name="INDEX" enabled="true" />
<object-type name="CONSTRAINT" enabled="true" />
<object-type name="DATASET_TRIGGER" enabled="true" />
<object-type name="DATABASE_TRIGGER" enabled="true" />
<object-type name="SYNONYM" enabled="true" />
<object-type name="SEQUENCE" enabled="true" />
<object-type name="PROCEDURE" enabled="true" />
<object-type name="FUNCTION" enabled="true" />
<object-type name="PACKAGE" enabled="true" />
<object-type name="TYPE" enabled="true" />
<object-type name="TYPE_ATTRIBUTE" enabled="true" />
<object-type name="ARGUMENT" enabled="true" />
<object-type name="DIMENSION" enabled="true" />
<object-type name="CLUSTER" enabled="true" />
<object-type name="DBLINK" enabled="true" />
</object-type-filter>
</filters>
<sorting>
<object-type name="COLUMN" sorting-type="NAME" />
<object-type name="FUNCTION" sorting-type="NAME" />
<object-type name="PROCEDURE" sorting-type="NAME" />
<object-type name="ARGUMENT" sorting-type="POSITION" />
<object-type name="TYPE ATTRIBUTE" sorting-type="POSITION" />
</sorting>
<default-editors>
<object-type name="VIEW" editor-type="SELECTION" />
<object-type name="PACKAGE" editor-type="SELECTION" />
<object-type name="TYPE" editor-type="SELECTION" />
</default-editors>
</browser-settings>
<navigation-settings>
<lookup-filters>
<lookup-objects>
<object-type name="SCHEMA" enabled="true" />
<object-type name="USER" enabled="false" />
<object-type name="ROLE" enabled="false" />
<object-type name="PRIVILEGE" enabled="false" />
<object-type name="CHARSET" enabled="false" />
<object-type name="TABLE" enabled="true" />
<object-type name="VIEW" enabled="true" />
<object-type name="MATERIALIZED VIEW" enabled="true" />
<object-type name="INDEX" enabled="true" />
<object-type name="CONSTRAINT" enabled="true" />
<object-type name="DATASET TRIGGER" enabled="true" />
<object-type name="DATABASE TRIGGER" enabled="true" />
<object-type name="SYNONYM" enabled="false" />
<object-type name="SEQUENCE" enabled="true" />
<object-type name="PROCEDURE" enabled="true" />
<object-type name="FUNCTION" enabled="true" />
<object-type name="PACKAGE" enabled="true" />
<object-type name="TYPE" enabled="true" />
<object-type name="DIMENSION" enabled="false" />
<object-type name="CLUSTER" enabled="false" />
<object-type name="DBLINK" enabled="true" />
</lookup-objects>
<force-database-load value="false" />
<prompt-connection-selection value="true" />
<prompt-schema-selection value="true" />
</lookup-filters>
</navigation-settings>
<dataset-grid-settings>
<general>
<enable-zooming value="true" />
<enable-column-tooltip value="true" />
</general>
<sorting>
<nulls-first value="true" />
<max-sorting-columns value="4" />
</sorting>
<audit-columns>
<column-names value="" />
<visible value="true" />
<editable value="false" />
</audit-columns>
</dataset-grid-settings>
<dataset-editor-settings>
<text-editor-popup>
<active value="false" />
<active-if-empty value="false" />
<data-length-threshold value="100" />
<popup-delay value="1000" />
</text-editor-popup>
<values-actions-popup>
<show-popup-button value="true" />
<element-count-threshold value="1000" />
<data-length-threshold value="250" />
</values-actions-popup>
<general>
<fetch-block-size value="100" />
<fetch-timeout value="30" />
<trim-whitespaces value="true" />
<convert-empty-strings-to-null value="true" />
<select-content-on-cell-edit value="true" />
<large-value-preview-active value="true" />
</general>
<filters>
<prompt-filter-dialog value="true" />
<default-filter-type value="BASIC" />
</filters>
<qualified-text-editor text-length-threshold="300">
<content-types>
<content-type name="Text" enabled="true" />
<content-type name="Properties" enabled="true" />
<content-type name="XML" enabled="true" />
<content-type name="DTD" enabled="true" />
<content-type name="HTML" enabled="true" />
<content-type name="XHTML" enabled="true" />
<content-type name="Java" enabled="true" />
<content-type name="SQL" enabled="true" />
<content-type name="PL/SQL" enabled="true" />
<content-type name="JSON" enabled="true" />
<content-type name="JSON5" enabled="true" />
<content-type name="Groovy" enabled="true" />
<content-type name="AIDL" enabled="true" />
<content-type name="YAML" enabled="true" />
<content-type name="Manifest" enabled="true" />
</content-types>
</qualified-text-editor>
<record-navigation>
<navigation-target value="VIEWER" />
</record-navigation>
</dataset-editor-settings>
<code-editor-settings>
<general>
<show-object-navigation-gutter value="false" />
<show-spec-declaration-navigation-gutter value="true" />
<enable-spellchecking value="true" />
<enable-reference-spellchecking value="false" />
</general>
<confirmations>
<save-changes value="false" />
<revert-changes value="true" />
</confirmations>
</code-editor-settings>
<code-completion-settings>
<filters>
<basic-filter>
<filter-element type="RESERVED_WORD" id="keyword" selected="true" />
<filter-element type="RESERVED_WORD" id="function" selected="true" />
<filter-element type="RESERVED_WORD" id="parameter" selected="true" />
<filter-element type="RESERVED_WORD" id="datatype" selected="true" />
<filter-element type="RESERVED_WORD" id="exception" selected="true" />
<filter-element type="OBJECT" id="schema" selected="true" />
<filter-element type="OBJECT" id="role" selected="true" />
<filter-element type="OBJECT" id="user" selected="true" />
<filter-element type="OBJECT" id="privilege" selected="true" />
<user-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="false" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</user-schema>
<public-schema>
<filter-element type="OBJECT" id="table" selected="false" />
<filter-element type="OBJECT" id="view" selected="false" />
<filter-element type="OBJECT" id="materialized view" selected="false" />
<filter-element type="OBJECT" id="index" selected="false" />
<filter-element type="OBJECT" id="constraint" selected="false" />
<filter-element type="OBJECT" id="trigger" selected="false" />
<filter-element type="OBJECT" id="synonym" selected="false" />
<filter-element type="OBJECT" id="sequence" selected="false" />
<filter-element type="OBJECT" id="procedure" selected="false" />
<filter-element type="OBJECT" id="function" selected="false" />
<filter-element type="OBJECT" id="package" selected="false" />
<filter-element type="OBJECT" id="type" selected="false" />
<filter-element type="OBJECT" id="dimension" selected="false" />
<filter-element type="OBJECT" id="cluster" selected="false" />
<filter-element type="OBJECT" id="dblink" selected="false" />
</public-schema>
<any-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="true" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</any-schema>
</basic-filter>
<extended-filter>
<filter-element type="RESERVED_WORD" id="keyword" selected="true" />
<filter-element type="RESERVED_WORD" id="function" selected="true" />
<filter-element type="RESERVED_WORD" id="parameter" selected="true" />
<filter-element type="RESERVED_WORD" id="datatype" selected="true" />
<filter-element type="RESERVED_WORD" id="exception" selected="true" />
<filter-element type="OBJECT" id="schema" selected="true" />
<filter-element type="OBJECT" id="user" selected="true" />
<filter-element type="OBJECT" id="role" selected="true" />
<filter-element type="OBJECT" id="privilege" selected="true" />
<user-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="true" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</user-schema>
<public-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="true" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</public-schema>
<any-schema>
<filter-element type="OBJECT" id="table" selected="true" />
<filter-element type="OBJECT" id="view" selected="true" />
<filter-element type="OBJECT" id="materialized view" selected="true" />
<filter-element type="OBJECT" id="index" selected="true" />
<filter-element type="OBJECT" id="constraint" selected="true" />
<filter-element type="OBJECT" id="trigger" selected="true" />
<filter-element type="OBJECT" id="synonym" selected="true" />
<filter-element type="OBJECT" id="sequence" selected="true" />
<filter-element type="OBJECT" id="procedure" selected="true" />
<filter-element type="OBJECT" id="function" selected="true" />
<filter-element type="OBJECT" id="package" selected="true" />
<filter-element type="OBJECT" id="type" selected="true" />
<filter-element type="OBJECT" id="dimension" selected="true" />
<filter-element type="OBJECT" id="cluster" selected="true" />
<filter-element type="OBJECT" id="dblink" selected="true" />
</any-schema>
</extended-filter>
</filters>
<sorting enabled="true">
<sorting-element type="RESERVED_WORD" id="keyword" />
<sorting-element type="RESERVED_WORD" id="datatype" />
<sorting-element type="OBJECT" id="column" />
<sorting-element type="OBJECT" id="table" />
<sorting-element type="OBJECT" id="view" />
<sorting-element type="OBJECT" id="materialized view" />
<sorting-element type="OBJECT" id="index" />
<sorting-element type="OBJECT" id="constraint" />
<sorting-element type="OBJECT" id="trigger" />
<sorting-element type="OBJECT" id="synonym" />
<sorting-element type="OBJECT" id="sequence" />
<sorting-element type="OBJECT" id="procedure" />
<sorting-element type="OBJECT" id="function" />
<sorting-element type="OBJECT" id="package" />
<sorting-element type="OBJECT" id="type" />
<sorting-element type="OBJECT" id="dimension" />
<sorting-element type="OBJECT" id="cluster" />
<sorting-element type="OBJECT" id="dblink" />
<sorting-element type="OBJECT" id="schema" />
<sorting-element type="OBJECT" id="role" />
<sorting-element type="OBJECT" id="user" />
<sorting-element type="RESERVED_WORD" id="function" />
<sorting-element type="RESERVED_WORD" id="parameter" />
</sorting>
<format>
<enforce-code-style-case value="true" />
</format>
</code-completion-settings>
<execution-engine-settings>
<statement-execution>
<fetch-block-size value="100" />
<execution-timeout value="20" />
<debug-execution-timeout value="600" />
<focus-result value="false" />
<prompt-execution value="false" />
</statement-execution>
<script-execution>
<command-line-interfaces />
<execution-timeout value="300" />
</script-execution>
<method-execution>
<execution-timeout value="30" />
<debug-execution-timeout value="600" />
<parameter-history-size value="10" />
</method-execution>
</execution-engine-settings>
<operation-settings>
<transactions>
<uncommitted-changes>
<on-project-close value="ASK" />
<on-disconnect value="ASK" />
<on-autocommit-toggle value="ASK" />
</uncommitted-changes>
<multiple-uncommitted-changes>
<on-commit value="ASK" />
<on-rollback value="ASK" />
</multiple-uncommitted-changes>
</transactions>
<session-browser>
<disconnect-session value="ASK" />
<kill-session value="ASK" />
<reload-on-filter-change value="false" />
</session-browser>
<compiler>
<compile-type value="KEEP" />
<compile-dependencies value="ASK" />
<always-show-controls value="false" />
</compiler>
<debugger>
<debugger-type value="ASK" />
<use-generic-runners value="true" />
</debugger>
</operation-settings>
<ddl-file-settings>
<extensions>
<mapping file-type-id="VIEW" extensions="vw" />
<mapping file-type-id="TRIGGER" extensions="trg" />
<mapping file-type-id="PROCEDURE" extensions="prc" />
<mapping file-type-id="FUNCTION" extensions="fnc" />
<mapping file-type-id="PACKAGE" extensions="pkg" />
<mapping file-type-id="PACKAGE_SPEC" extensions="pks" />
<mapping file-type-id="PACKAGE_BODY" extensions="pkb" />
<mapping file-type-id="TYPE" extensions="tpe" />
<mapping file-type-id="TYPE_SPEC" extensions="tps" />
<mapping file-type-id="TYPE_BODY" extensions="tpb" />
</extensions>
<general>
<lookup-ddl-files value="true" />
<create-ddl-files value="false" />
<synchronize-ddl-files value="true" />
<use-qualified-names value="false" />
<make-scripts-rerunnable value="true" />
</general>
</ddl-file-settings>
<general-settings>
<regional-settings>
<date-format value="MEDIUM" />
<number-format value="UNGROUPED" />
<locale value="SYSTEM_DEFAULT" />
<use-custom-formats value="false" />
</regional-settings>
<environment>
<environment-types>
<environment-type id="development" name="Development" description="Development environment" color="-2430209/-12296320" readonly-code="false" readonly-data="false" />
<environment-type id="integration" name="Integration" description="Integration environment" color="-2621494/-12163514" readonly-code="true" readonly-data="false" />
<environment-type id="production" name="Production" description="Productive environment" color="-11574/-10271420" readonly-code="true" readonly-data="true" />
<environment-type id="other" name="Other" description="" color="-1576/-10724543" readonly-code="false" readonly-data="false" />
</environment-types>
<visibility-settings>
<connection-tabs value="true" />
<dialog-headers value="true" />
<object-editor-tabs value="true" />
<script-editor-tabs value="false" />
<execution-result-tabs value="true" />
</visibility-settings>
</environment>
</general-settings>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="deploymentTargetDropDown">
<runningDeviceTargetSelectedWithDropDown>
<Target>
<type value="RUNNING_DEVICE_TARGET" />
<deviceKey>
<Key>
<type value="SERIAL_NUMBER" />
<value value="adb-JRF67HLJ9HX85XCU-GNBKo9._adb-tls-connect._tcp" />
</Key>
</deviceKey>
</Target>
</runningDeviceTargetSelectedWithDropDown>
<timeTargetWasSelectedWithDropDown value="2023-07-28T10:10:24.047223600Z" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="GradleMigrationSettings" migrationVersion="1" />
<component name="GradleSettings">
<option name="linkedExternalProjectsSettings">
<GradleProjectSettings>
<option name="testRunner" value="GRADLE" />
<option name="distributionType" value="DEFAULT_WRAPPED" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="gradleJvm" value="Embedded JDK" />
<option name="modules">
<set>
<option value="$PROJECT_DIR$" />
<option value="$PROJECT_DIR$/app" />
</set>
</option>
</GradleProjectSettings>
</option>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" default="true" project-jdk-name="11" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/build/classes" />
</component>
<component name="ProjectType">
<option name="id" value="Android" />
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
</component>
</project>
\ No newline at end of file
/build
\ No newline at end of file
plugins {
id 'com.android.application'
}
android {
compileSdk 30
defaultConfig {
applicationId "com.doujiao.xiaozhihuiassistant"
minSdk 21
targetSdk 26
versionCode 1
versionName "1.0"
testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
externalNativeBuild {
cmake {
cppFlags '-std=c++11'
}
}
ndk {
abiFilters 'arm64-v8a','armeabi-v7a','x86'
}
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
externalNativeBuild {
cmake {
path file('src/main/cpp/CMakeLists.txt')
version '3.18.1'
}
}
// sourceSets {
// main {
// // jnilib
// jniLibs.srcDirs = ['libs']
// }
// }
splits {
abi {
enable true
reset()
include 'arm64-v8a', 'armeabi-v7a','x86'
universalApk true
}
}
buildFeatures {
viewBinding true
}
}
dependencies {
implementation 'com.android.support:appcompat-v7:28.0.0'
implementation 'com.android.support:recyclerview-v7:28.0.0'
implementation 'com.android.support.constraint:constraint-layout:2.0.4'
testImplementation 'junit:junit:4.13.2'
androidTestImplementation 'com.android.support.test:runner:1.0.2'
androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment