Commit 564cbe7a authored by zhuwenwen's avatar zhuwenwen
Browse files

remove SUPPORT_MOE_MARLIN_W16A16

parent 0328ef06
...@@ -16,11 +16,7 @@ from vllm.utils import cuda_device_count_stateless ...@@ -16,11 +16,7 @@ from vllm.utils import cuda_device_count_stateless
from .interface import DeviceCapability, Platform, PlatformEnum, _Backend from .interface import DeviceCapability, Platform, PlatformEnum, _Backend
from vllm.utils import SUPPORT_TC, SUPPORT_MOE_MARLIN_W16A16 from vllm.utils import SUPPORT_TC
if SUPPORT_MOE_MARLIN_W16A16:
os.environ['VLLM_USE_MARLIN_W16A16_MOE'] = '1'
os.environ['MOE_NN'] = '0'
if not SUPPORT_TC: if not SUPPORT_TC:
os.environ['VLLM_USE_V1'] = '0' os.environ['VLLM_USE_V1'] = '0'
......
...@@ -87,7 +87,6 @@ MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120 ...@@ -87,7 +87,6 @@ MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120
GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
SUPPORT_TC = any(arch in GPU_ARCH for arch in ["gfx928", "gfx936", "gfx938"]) SUPPORT_TC = any(arch in GPU_ARCH for arch in ["gfx928", "gfx936", "gfx938"])
SUPPORT_MOE_MARLIN_W16A16 = any(arch in GPU_ARCH for arch in ["gfx936"])
def _generate_random_int8( def _generate_random_int8(
tensor: torch.Tensor, tensor: torch.Tensor,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment