Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ktransformers
Commits
32f3d7be
You need to sign in or sign up before continuing.
Unverified
Commit
32f3d7be
authored
May 17, 2025
by
wang jiahao
Committed by
GitHub
May 17, 2025
Browse files
Merge pull request #1307 from kvcache-ai/hyc
add xpu parameters to install.sh
parents
551ebc91
5b08d5b0
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
31 additions
and
15 deletions
+31
-15
doc/en/balance-serve.md
doc/en/balance-serve.md
+0
-2
doc/en/install.md
doc/en/install.md
+0
-2
doc/en/llama4.md
doc/en/llama4.md
+0
-2
doc/en/xpu.md
doc/en/xpu.md
+1
-3
doc/zh/DeepseekR1_V3_tutorial_zh.md
doc/zh/DeepseekR1_V3_tutorial_zh.md
+0
-2
install.sh
install.sh
+20
-2
pyproject.toml
pyproject.toml
+0
-1
requirements-local_chat.txt
requirements-local_chat.txt
+0
-1
setup.py
setup.py
+10
-0
No files found.
doc/en/balance-serve.md
View file @
32f3d7be
...
@@ -100,10 +100,8 @@ git submodule update --init --recursive
...
@@ -100,10 +100,8 @@ git submodule update --init --recursive
# Install single NUMA dependencies
# Install single NUMA dependencies
USE_BALANCE_SERVE
=
1 bash ./install.sh
USE_BALANCE_SERVE
=
1 bash ./install.sh
pip
install
third_party/custom_flashinfer/
# For those who have two cpu and 1T RAM(Dual NUMA):
# For those who have two cpu and 1T RAM(Dual NUMA):
USE_BALANCE_SERVE
=
1
USE_NUMA
=
1 bash ./install.sh
USE_BALANCE_SERVE
=
1
USE_NUMA
=
1 bash ./install.sh
pip
install
third_party/custom_flashinfer/
```
```
## Running DeepSeek-R1-Q4KM Models
## Running DeepSeek-R1-Q4KM Models
...
...
doc/en/install.md
View file @
32f3d7be
...
@@ -117,13 +117,11 @@ Download source code and compile:
...
@@ -117,13 +117,11 @@ Download source code and compile:
```shell
```shell
USE_BALANCE_SERVE=1 bash ./install.sh
USE_BALANCE_SERVE=1 bash ./install.sh
pip install third_party/custom_flashinfer/
```
```
-
For Multi-concurrency with two cpu and 1T RAM:
-
For Multi-concurrency with two cpu and 1T RAM:
```shell
```shell
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
USE_BALANCE_SERVE=1 USE_NUMA=1 bash ./install.sh
pip install third_party/custom_flashinfer/
```
```
-
For Windows (Windows native temporarily deprecated, please try WSL)
-
For Windows (Windows native temporarily deprecated, please try WSL)
...
...
doc/en/llama4.md
View file @
32f3d7be
...
@@ -68,10 +68,8 @@ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.o
...
@@ -68,10 +68,8 @@ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.o
```
bash
```
bash
# Install single NUMA dependencies
# Install single NUMA dependencies
USE_BALANCE_SERVE
=
1 bash ./install.sh
USE_BALANCE_SERVE
=
1 bash ./install.sh
pip
install
third_party/custom_flashinfer/
# For those who have two cpu and 1T RAM(Dual NUMA):
# For those who have two cpu and 1T RAM(Dual NUMA):
USE_BALANCE_SERVE
=
1
USE_NUMA
=
1 bash ./install.sh
USE_BALANCE_SERVE
=
1
USE_NUMA
=
1 bash ./install.sh
pip
install
third_party/custom_flashinfer/
```
```
### 4. Use our custom config.json
### 4. Use our custom config.json
...
...
doc/en/xpu.md
View file @
32f3d7be
...
@@ -62,9 +62,7 @@ cd ktransformers
...
@@ -62,9 +62,7 @@ cd ktransformers
git submodule update
--init
git submodule update
--init
# Install dependencies
# Install dependencies
bash install.sh
bash install.sh
--dev
xpu
pip uninstall triton pytorch-triton-xpu
pip
install
pytorch-triton-xpu
==
3.3.0
--extra-index-url
https://download.pytorch.org/whl/xpu
# to avoid potential triton import error
```
```
## Running DeepSeek-R1 Models
## Running DeepSeek-R1 Models
...
...
doc/zh/DeepseekR1_V3_tutorial_zh.md
View file @
32f3d7be
...
@@ -127,10 +127,8 @@ cd ktransformers
...
@@ -127,10 +127,8 @@ cd ktransformers
git submodule update
--init
--recursive
git submodule update
--init
--recursive
# 如果使用双 numa 版本
# 如果使用双 numa 版本
USE_BALANCE_SERVE
=
1
USE_NUMA
=
1 bash ./install.sh
USE_BALANCE_SERVE
=
1
USE_NUMA
=
1 bash ./install.sh
pip
install
third_party/custom_flashinfer/
# 如果使用单 numa 版本
# 如果使用单 numa 版本
USE_BALANCE_SERVE
=
1 bash ./install.sh
USE_BALANCE_SERVE
=
1 bash ./install.sh
pip
install
third_party/custom_flashinfer/
# 启动命令
# 启动命令
python ktransformers/server/main.py
--model_path
<your model path>
--gguf_path
<your gguf path>
--cpu_infer
62
--optimize_config_path
<inject rule path>
--port
10002
--chunk_size
256
--max_new_tokens
1024
--max_batch_size
4
--port
10002
--cache_lens
32768
--backend_type
balance_serve
python ktransformers/server/main.py
--model_path
<your model path>
--gguf_path
<your gguf path>
--cpu_infer
62
--optimize_config_path
<inject rule path>
--port
10002
--chunk_size
256
--max_new_tokens
1024
--max_batch_size
4
--port
10002
--cache_lens
32768
--backend_type
balance_serve
```
```
...
...
install.sh
View file @
32f3d7be
#!/bin/bash
#!/bin/bash
set
-e
set
-e
# default backend
DEV
=
"cuda"
# parse --dev argument
while
[[
"$#"
-gt
0
]]
;
do
case
$1
in
--dev
)
DEV
=
"
$2
"
;
shift
;;
*
)
echo
"Unknown parameter passed:
$1
"
;
exit
1
;;
esac
shift
done
export
DEV_BACKEND
=
"
$DEV
"
echo
"Selected backend:
$DEV_BACKEND
"
# clear build dirs
# clear build dirs
rm
-rf
build
rm
-rf
build
rm
-rf
*
.egg-info
rm
-rf
*
.egg-info
...
@@ -13,13 +27,17 @@ rm -rf ~/.ktransformers
...
@@ -13,13 +27,17 @@ rm -rf ~/.ktransformers
echo
"Installing python dependencies from requirements.txt"
echo
"Installing python dependencies from requirements.txt"
pip
install
-r
requirements-local_chat.txt
pip
install
-r
requirements-local_chat.txt
pip
install
-r
ktransformers/server/requirements.txt
pip
install
-r
ktransformers/server/requirements.txt
echo
"Installing ktransformers"
echo
"Installing ktransformers"
KTRANSFORMERS_FORCE_BUILD
=
TRUE pip
install
-v
.
--no-build-isolation
KTRANSFORMERS_FORCE_BUILD
=
TRUE pip
install
-v
.
--no-build-isolation
if
[[
"
$DEV_BACKEND
"
==
"cuda"
]]
;
then
echo
"Installing custom_flashinfer for CUDA backend"
pip
install
third_party/custom_flashinfer/
fi
# SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
# SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
# echo "Copying thirdparty libs to $SITE_PACKAGES"
# echo "Copying thirdparty libs to $SITE_PACKAGES"
# cp -a csrc/balance_serve/build/third_party/prometheus-cpp/lib/libprometheus-cpp-*.so* $SITE_PACKAGES/
# cp -a csrc/balance_serve/build/third_party/prometheus-cpp/lib/libprometheus-cpp-*.so* $SITE_PACKAGES/
# patchelf --set-rpath '$ORIGIN' $SITE_PACKAGES/sched_ext.cpython*
# patchelf --set-rpath '$ORIGIN' $SITE_PACKAGES/sched_ext.cpython*
echo
"Installation completed successfully"
echo
"Installation completed successfully"
\ No newline at end of file
pyproject.toml
View file @
32f3d7be
...
@@ -30,7 +30,6 @@ dependencies = [
...
@@ -30,7 +30,6 @@ dependencies = [
"build"
,
"build"
,
"fire"
,
"fire"
,
"protobuf"
,
"protobuf"
,
"triton >= 3.2"
]
]
requires-python
=
">=3.10"
requires-python
=
">=3.10"
...
...
requirements-local_chat.txt
View file @
32f3d7be
...
@@ -7,4 +7,3 @@ cpufeature; sys_platform == 'win32' or sys_platform == 'Windows'
...
@@ -7,4 +7,3 @@ cpufeature; sys_platform == 'win32' or sys_platform == 'Windows'
protobuf
protobuf
tiktoken
tiktoken
blobfile
blobfile
triton>=3.2
setup.py
View file @
32f3d7be
...
@@ -41,6 +41,15 @@ except ImportError:
...
@@ -41,6 +41,15 @@ except ImportError:
MUSA_HOME
=
None
MUSA_HOME
=
None
KTRANSFORMERS_BUILD_XPU
=
torch
.
xpu
.
is_available
()
KTRANSFORMERS_BUILD_XPU
=
torch
.
xpu
.
is_available
()
# 检测 DEV_BACKEND 环境变量
dev_backend
=
os
.
environ
.
get
(
"DEV_BACKEND"
,
""
).
lower
()
if
dev_backend
==
"xpu"
:
triton_dep
=
[
"pytorch-triton-xpu==3.3.0"
]
else
:
triton_dep
=
[
"triton>=3.2"
]
with_balance
=
os
.
environ
.
get
(
"USE_BALANCE_SERVE"
,
"0"
)
==
"1"
with_balance
=
os
.
environ
.
get
(
"USE_BALANCE_SERVE"
,
"0"
)
==
"1"
class
CpuInstructInfo
:
class
CpuInstructInfo
:
...
@@ -659,6 +668,7 @@ else:
...
@@ -659,6 +668,7 @@ else:
setup
(
setup
(
name
=
VersionInfo
.
PACKAGE_NAME
,
name
=
VersionInfo
.
PACKAGE_NAME
,
version
=
VersionInfo
().
get_package_version
(),
version
=
VersionInfo
().
get_package_version
(),
install_requires
=
triton_dep
,
cmdclass
=
{
"bdist_wheel"
:
BuildWheelsCommand
,
"build_ext"
:
CMakeBuild
},
cmdclass
=
{
"bdist_wheel"
:
BuildWheelsCommand
,
"build_ext"
:
CMakeBuild
},
ext_modules
=
ext_modules
ext_modules
=
ext_modules
)
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment