Unverified Commit 2039c630 authored by Thien Tran's avatar Thien Tran Committed by GitHub
Browse files

[Bugfix] Fix imports for MoE on CPU (#15841)


Signed-off-by: default avatarThien Tran <gau.nernst@yahoo.com.sg>
parent 6efb195a
...@@ -4,8 +4,6 @@ from typing import List, Optional ...@@ -4,8 +4,6 @@ from typing import List, Optional
import torch import torch
import vllm.envs as envs import vllm.envs as envs
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
per_token_group_quant_fp8)
from vllm.platforms import current_platform from vllm.platforms import current_platform
...@@ -38,6 +36,9 @@ def rocm_aiter_fused_experts( ...@@ -38,6 +36,9 @@ def rocm_aiter_fused_experts(
import aiter as rocm_aiter import aiter as rocm_aiter
import aiter.fused_moe_bf16_asm as rocm_aiter_asm_fmoe import aiter.fused_moe_bf16_asm as rocm_aiter_asm_fmoe
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
per_token_group_quant_fp8)
if envs.VLLM_ROCM_USE_AITER_FP8_BLOCK_SCALED_MOE and use_fp8_w8a8: if envs.VLLM_ROCM_USE_AITER_FP8_BLOCK_SCALED_MOE and use_fp8_w8a8:
assert w1_scale is not None assert w1_scale is not None
assert w2_scale is not None assert w2_scale is not None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment