Commit 0ff29dbf authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev' into v0.9.2-dev-ds

parents e0ba23b5 8c0143db
...@@ -916,7 +916,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding): ...@@ -916,7 +916,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
direct_register_custom_op( direct_register_custom_op(
op_name="rotary_embedding_deepseek_fuse", op_name="rotary_embedding_deepseek_fuse",
op_func=rotary_embedding_deepseek_fuse, op_func=rotary_embedding_deepseek_fuse,
mutates_args=[], mutates_args=["query", "key"],
fake_impl=rotary_embedding_deepseek_fuse_fake, fake_impl=rotary_embedding_deepseek_fuse_fake,
) )
...@@ -958,8 +958,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding): ...@@ -958,8 +958,7 @@ class DeepseekScalingRotaryEmbedding(RotaryEmbedding):
BLOCK_SIZE=BLOCK_SIZE, BLOCK_SIZE=BLOCK_SIZE,
num_warps=1) num_warps=1)
# if envs.VLLM_USE_LIGHTOP: if envs.VLLM_USE_LIGHTOP:
if False:
torch.ops.vllm.rotary_embedding_deepseek_fuse(positions, query, key, self.head_size, self.cos_sin_cache, self.is_neox_style) torch.ops.vllm.rotary_embedding_deepseek_fuse(positions, query, key, self.head_size, self.cos_sin_cache, self.is_neox_style)
else: else:
call(query) call(query)
......
...@@ -16,6 +16,14 @@ from vllm.utils import cuda_device_count_stateless ...@@ -16,6 +16,14 @@ from vllm.utils import cuda_device_count_stateless
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
from vllm.utils import SUPPORT_TC
if not SUPPORT_TC:
os.environ['VLLM_USE_V1'] = '0'
os.environ['VLLM_USE_FLASH_ATTN_PA'] = '0'
os.environ['VLLM_USE_FLASH_MLA'] = '0'
if TYPE_CHECKING: if TYPE_CHECKING:
from vllm.config import ModelConfig, VllmConfig from vllm.config import ModelConfig, VllmConfig
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment