Commit 3d68ca03 authored by zhuwenwen's avatar zhuwenwen
Browse files

update use_int8_w8a8

parent 10cdc93d
......@@ -1570,7 +1570,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor,
start_expert: Optional[int] = -1,
end_expert: Optional[int] = -1) -> None:
fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids, True,
activation, use_fp8_w8a8, use_int8_w8a16,
activation, use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16,
use_int4_w4a16, global_num_experts, expert_map,
w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale,
block_shape, use_nn_moe, moe_ep_size=moe_ep_size,
......@@ -1637,7 +1637,7 @@ def outplace_fused_experts(
start_expert: Optional[int] = -1,
end_expert: Optional[int] = -1) -> torch.Tensor:
return fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids,
False, activation, use_fp8_w8a8, use_int8_w8a16,
False, activation, use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16,
use_int4_w4a16, global_num_experts, expert_map,
w1_scale, w2_scale, w1_zp, w2_zp, a1_scale,
a2_scale, block_shape,
......@@ -1708,7 +1708,7 @@ def fused_experts(hidden_states: torch.Tensor,
if inplace:
torch.ops.vllm.inplace_fused_experts(
hidden_states, w1, w2, topk_weights, topk_ids, activation,
use_fp8_w8a8, use_int8_w8a16, use_int4_w4a16, global_num_experts,
use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, global_num_experts,
expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale,
block_shape,
use_nn_moe,
......@@ -1719,7 +1719,7 @@ def fused_experts(hidden_states: torch.Tensor,
else:
return torch.ops.vllm.outplace_fused_experts(
hidden_states, w1, w2, topk_weights, topk_ids, activation,
use_fp8_w8a8, use_int8_w8a16, use_int4_w4a16, global_num_experts,
use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, global_num_experts,
expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale,
block_shape,
use_nn_moe,
......@@ -2081,6 +2081,7 @@ def fused_moe(
inplace=inplace,
activation=activation,
use_fp8_w8a8=use_fp8_w8a8,
use_int8_w8a8=use_int8_w8a8,
use_int8_w8a16=use_int8_w8a16,
use_int4_w4a16=use_int4_w4a16,
global_num_experts=global_num_experts,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment