Commit 184b50f7 authored by zhuwenwen's avatar zhuwenwen
Browse files

fix run error

parent 28a8a733
...@@ -125,6 +125,16 @@ class SamplerOutput( ...@@ -125,6 +125,16 @@ class SamplerOutput(
# Time taken in the model execute function. This will include model forward, # Time taken in the model execute function. This will include model forward,
# block/sync across workers, cpu-gpu sync time and sampling time. # block/sync across workers, cpu-gpu sync time and sampling time.
model_execute_time: Optional[float] = None model_execute_time: Optional[float] = None
# Optional lm_head logits from the model.
logits: Optional[torch.Tensor] = None
# tree-style cartesian candidates
cart_candidates: Optional[torch.Tensor] = None
# tree-style cartesian candidates
tree_attn_masks: Optional[torch.Tensor] = None
def __getitem__(self, idx: int) -> CompletionSequenceGroupOutput: def __getitem__(self, idx: int) -> CompletionSequenceGroupOutput:
return self.outputs[idx] return self.outputs[idx]
......
...@@ -511,8 +511,6 @@ class ChatGLMModel(nn.Module): ...@@ -511,8 +511,6 @@ class ChatGLMModel(nn.Module):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
self.config = config
config = vllm_config.model_config.hf_config config = vllm_config.model_config.hf_config
cache_config = vllm_config.cache_config cache_config = vllm_config.cache_config
......
from .interface import _Backend # noqa: F401 from .interface import _Backend # noqa: F401
from .interface import CpuArchEnum, Platform, PlatformEnum, UnspecifiedPlatform from .interface import CpuArchEnum, Platform, PlatformEnum, UnspecifiedPlatform
import torch
current_platform: Platform current_platform: Platform
......
...@@ -476,7 +476,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]): ...@@ -476,7 +476,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
self.block_aligned_sliding_window = \ self.block_aligned_sliding_window = \
self.sliding_window_blocks * self.block_size self.sliding_window_blocks * self.block_size
self.is_encoder_decoder_model = self.runner.model_config.is_encoder_decoder_model self.is_encoder_decoder_model = self.runner.model_config.is_encoder_decoder
def _compute_lens(self, inter_data: InterDataForSeqGroup, seq_idx: int, def _compute_lens(self, inter_data: InterDataForSeqGroup, seq_idx: int,
seq_group_metadata: SequenceGroupMetadata): seq_group_metadata: SequenceGroupMetadata):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment