[CI][Spec Decode] fix: broken test for EAGLE model (#11972)

Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>

[CI][Spec Decode] fix: broken test for EAGLE model (#11972)
Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>
80ea3af1 · Sungjae Lee · GitHub · 9dd02d85 · 80ea3af1 · 80ea3af1
Unverified Commit 80ea3af1 authored Jan 13, 2025 by Sungjae Lee Committed by GitHub Jan 13, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 13 additions and 2 deletions

.buildkite/test-pipeline.yaml .buildkite/test-pipeline.yaml +3 -1

vllm/model_executor/models/eagle.py vllm/model_executor/models/eagle.py +10 -1

No files found.
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -231,13 +231,15 @@ steps:
    - pytest -v -s test_logits_processor.py
    - pytest -v -s model_executor/test_guided_processors.py

- label: Speculative decoding tests # 30min
+- label: Speculative decoding tests # 40min
  source_file_dependencies:
  - vllm/spec_decode
  - tests/spec_decode
+  - vllm/model_executor/models/eagle.py
  commands:
    - pytest -v -s spec_decode/e2e/test_multistep_correctness.py
    - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
+    - pytest -v -s spec_decode/e2e/test_eagle_correctness.py

 - label: LoRA Test %N # 15min each
  mirror_hardwares: [amd]

--- a/vllm/model_executor/models/eagle.py
+++ b/vllm/model_executor/models/eagle.py
@@ -19,6 +19,11 @@ from .utils import maybe_prefix

 class DummyInputLayerNorm(nn.Module):

+    def __init__(self, weight=None, bias=None):
+        super().__init__()
+        self.weight = nn.Parameter(weight) if weight is not None else None
+        self.bias = nn.Parameter(bias) if bias is not None else None
+
    def forward(self, x):
        return x

@@ -69,7 +74,11 @@ class EAGLE(nn.Module):

        # Modify layer normalization and residual connections as suggested
        # in the EAGLE framework: https://github.com/SafeAILab/EAGLE
-        self.model.model.layers[0].input_layernorm = DummyInputLayerNorm()
+        # While weights and biases are generally not needed,
+        # they are retained here to support certain unit tests
+        # (e.g., spec_decode/e2e/test_eagle_correctness.py).
+        self.model.model.layers[0].input_layernorm = DummyInputLayerNorm(
+            weight=self.model.model.layers[0].input_layernorm.weight)
        self.model.model.norm = DummyOutputNorm()

        self.orig_vocab_size = config.vocab_size