Commit 1bd3ae33 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip silu_mul_fp8_quant_deep_gemm_cuda and remove zero_overhead

parent 9bf1b213
This diff is collapsed.
from dataclasses import dataclass
from vllm.v1.outputs import ModelRunnerOutput
@dataclass
class ZeroV1ModelRunnerOutput(ModelRunnerOutput):
# [num_reqs]
fix_req_ids: list[str] = None
fix_sampled_token_ids:list[list[int]] = None
fix_draft_req_ids:list[str] = None
fix_draft_tokens_ids:list[list[int]] = None
is_output_valid:bool = True
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment