fix run error

184b50f7 · zhuwenwen · 28a8a733 · 184b50f7 · 184b50f7 · 184b50f7
Commit 184b50f7 authored Jan 03, 2025 by zhuwenwen
4 changed files
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
@@ -126,6 +126,16 @@ class SamplerOutput(
    # block/sync across workers, cpu-gpu sync time and sampling time.
    model_execute_time: Optional[float] = None
    
+    # Optional lm_head logits from the model.
+    logits: Optional[torch.Tensor] = None
+
+    # tree-style cartesian candidates
+    cart_candidates: Optional[torch.Tensor] = None
+
+    # tree-style cartesian candidates
+    tree_attn_masks: Optional[torch.Tensor] = None
+
+
    def __getitem__(self, idx: int) -> CompletionSequenceGroupOutput:
        return self.outputs[idx]


--- a/vllm/model_executor/models/chatglm.py
+++ b/vllm/model_executor/models/chatglm.py
@@ -512,8 +512,6 @@ class ChatGLMModel(nn.Module):
    def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
        super().__init__()

-        self.config = config
-
        config = vllm_config.model_config.hf_config
        cache_config = vllm_config.cache_config
        quant_config = vllm_config.quant_config

--- a/vllm/platforms/__init__.py
+++ b/vllm/platforms/__init__.py
 from .interface import _Backend  # noqa: F401
 from .interface import CpuArchEnum, Platform, PlatformEnum, UnspecifiedPlatform
+import torch

 current_platform: Platform


--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -476,7 +476,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
            self.block_aligned_sliding_window = \
                self.sliding_window_blocks * self.block_size

-        self.is_encoder_decoder_model = self.runner.model_config.is_encoder_decoder_model
+        self.is_encoder_decoder_model = self.runner.model_config.is_encoder_decoder

    def _compute_lens(self, inter_data: InterDataForSeqGroup, seq_idx: int,
                      seq_group_metadata: SequenceGroupMetadata):