Unverified Commit 5d2965b7 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Bugfix] Fix 2 Node and Spec Decode tests (#13341)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent a0231b7c
...@@ -275,11 +275,11 @@ def _compare_tp( ...@@ -275,11 +275,11 @@ def _compare_tp(
if load_format == "dummy": if load_format == "dummy":
# Avoid OOM # Avoid OOM
text_overrides = { text_overrides = {
"num_layers": 1, "num_hidden_layers": 4,
"num_hidden_layers": 1, "hidden_size": 512,
"num_experts": 2, "intermediate_size": 800,
"num_experts_per_tok": 2, "num_attention_heads": 4,
"num_local_experts": 2, "num_key_value_heads": 1,
} }
if is_multimodal: if is_multimodal:
......
...@@ -6,6 +6,7 @@ from typing import List, Optional, Set, Tuple ...@@ -6,6 +6,7 @@ from typing import List, Optional, Set, Tuple
import torch import torch
import torch.nn as nn import torch.nn as nn
from vllm.config import VllmConfig
from vllm.model_executor.layers.sampler import SamplerOutput from vllm.model_executor.layers.sampler import SamplerOutput
from vllm.sequence import ExecuteModelRequest from vllm.sequence import ExecuteModelRequest
from vllm.spec_decode.interfaces import SpeculativeProposals from vllm.spec_decode.interfaces import SpeculativeProposals
...@@ -25,11 +26,18 @@ class NGramWorker(NonLLMProposerWorkerBase): ...@@ -25,11 +26,18 @@ class NGramWorker(NonLLMProposerWorkerBase):
which don't rely on LLM model to give proposals. which don't rely on LLM model to give proposals.
""" """
def __init__(self, *args, **kwargs): def __init__(
self,
vllm_config: VllmConfig,
local_rank: int,
device_type: str = "cuda",
**kwargs,
):
super().__init__(vllm_config)
# Get local_rank/vocab_size from kwargs attribute # Get local_rank/vocab_size from kwargs attribute
self.local_rank = kwargs["local_rank"] self.local_rank = local_rank
self.vocab_size = kwargs["vllm_config"].model_config.get_vocab_size() self.device_type = device_type
self.device_type = kwargs.get("device_type", "cuda")
# Lazy initialization list. # Lazy initialization list.
self._proposer: Top1Proposer self._proposer: Top1Proposer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment