Commit 184b50f7 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix run error

parent 28a8a733
......@@ -126,6 +126,16 @@ class SamplerOutput(
# block/sync across workers, cpu-gpu sync time and sampling time.
model_execute_time: Optional[float] = None
# Optional lm_head logits from the model.
logits: Optional[torch.Tensor] = None
# tree-style cartesian candidates
cart_candidates: Optional[torch.Tensor] = None
# tree-style cartesian candidates
tree_attn_masks: Optional[torch.Tensor] = None
def __getitem__(self, idx: int) -> CompletionSequenceGroupOutput:
return self.outputs[idx]
......
......@@ -512,8 +512,6 @@ class ChatGLMModel(nn.Module):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
self.config = config
config = vllm_config.model_config.hf_config
cache_config = vllm_config.cache_config
quant_config = vllm_config.quant_config
......
from .interface import _Backend # noqa: F401
from .interface import CpuArchEnum, Platform, PlatformEnum, UnspecifiedPlatform
import torch
current_platform: Platform
......
......@@ -476,7 +476,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
self.block_aligned_sliding_window = \
self.sliding_window_blocks * self.block_size
self.is_encoder_decoder_model = self.runner.model_config.is_encoder_decoder_model
self.is_encoder_decoder_model = self.runner.model_config.is_encoder_decoder
def _compute_lens(self, inter_data: InterDataForSeqGroup, seq_idx: int,
seq_group_metadata: SequenceGroupMetadata):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment