Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bdd33b3f
Commit
bdd33b3f
authored
Jan 30, 2026
by
zhuwenwen
Browse files
update fa interface and kvcache
add prepare_so_files to prepare so
parent
63053820
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
271 additions
and
210 deletions
+271
-210
README.md
README.md
+3
-0
setup.py
setup.py
+34
-0
vllm/attention/layer.py
vllm/attention/layer.py
+4
-1
vllm/v1/attention/backends/flash_attn.py
vllm/v1/attention/backends/flash_attn.py
+230
-209
No files found.
README.md
View file @
bdd33b3f
...
...
@@ -91,6 +91,9 @@ python3 setup.py install (若调试,可使用python3 setup.py develop)
```
若需要添加git号,设置环境变量: export ADD_GIT_VERSION=1
3.
跳过编译(适用于未改变csrc目录kernel并多次编译情况)
将编译后的so文件拷贝至csrc目录,并设置环境变量: export SKIP_VLLM_BUILD=1
#### 运行基础环境准备
1、使用上面基于光源pytorch2.9.0基础镜像环境
...
...
setup.py
View file @
bdd33b3f
...
...
@@ -13,6 +13,8 @@ import sys
import
sysconfig
from
pathlib
import
Path
from
shutil
import
which
import
tarfile
import
shutil
import
torch
from
packaging.version
import
Version
,
parse
...
...
@@ -36,6 +38,37 @@ skip_vllm_build = False
if
int
(
os
.
environ
.
get
(
'SKIP_VLLM_BUILD'
,
'0'
))
==
1
:
skip_vllm_build
=
True
def
prepare_so_files
():
source_dir
=
"csrc/so.tar.gz"
target_dir
=
"vllm"
if
not
os
.
path
.
exists
(
source_dir
):
print
(
f
"Warning:
{
source_dir
}
not found, skipping extraction"
)
return
print
(
f
"Preparing C extension files from
{
source_dir
}
..."
)
temp_dir
=
"temp_so_extract"
os
.
makedirs
(
temp_dir
,
exist_ok
=
True
)
try
:
with
tarfile
.
open
(
source_dir
,
"r:*"
)
as
tar
:
tar
.
extractall
(
temp_dir
)
for
root
,
dirs
,
files
in
os
.
walk
(
temp_dir
):
for
file
in
files
:
if
file
in
[
"_C.abi3.so"
,
"_moe_C.abi3.so"
,
"cumem_allocator.abi3.so"
]:
src_path
=
os
.
path
.
join
(
root
,
file
)
dst_path
=
os
.
path
.
join
(
target_dir
,
file
)
os
.
makedirs
(
os
.
path
.
dirname
(
dst_path
),
exist_ok
=
True
)
shutil
.
copy2
(
src_path
,
dst_path
)
print
(
f
"Copied
{
file
}
to
{
dst_path
}
"
)
finally
:
if
os
.
path
.
exists
(
temp_dir
):
shutil
.
rmtree
(
temp_dir
)
def
load_module_from_path
(
module_name
,
path
):
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
path
)
module
=
importlib
.
util
.
module_from_spec
(
spec
)
...
...
@@ -1109,6 +1142,7 @@ if _build_custom_ops():
ext_modules
.
append
(
CMakeExtension
(
name
=
"vllm._C"
))
if
skip_vllm_build
:
prepare_so_files
()
package_data
=
{
"vllm"
:
[
"py.typed"
,
...
...
vllm/attention/layer.py
View file @
bdd33b3f
...
...
@@ -848,7 +848,10 @@ def unified_kv_cache_update(
layer_slot_mapping
,
)
return
torch
.
empty
(
0
,
device
=
kv_cache
.
device
,
dtype
=
kv_cache
.
dtype
)
if
current_platform
.
is_rocm
():
return
torch
.
empty
(
0
,
device
=
key
.
device
,
dtype
=
key
.
dtype
)
else
:
return
torch
.
empty
(
0
,
device
=
kv_cache
.
device
,
dtype
=
kv_cache
.
dtype
)
def
unified_kv_cache_update_fake
(
...
...
vllm/v1/attention/backends/flash_attn.py
View file @
bdd33b3f
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment