[fix]v0 SamplerOutput在非tree decoding时不传入logits

01692be6 · 王敏 · 9a4a94ee · 01692be6
Commit 01692be6 authored Aug 20, 2025 by 王敏
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/model_executor/layers/sampler.py vllm/model_executor/layers/sampler.py +3 -1

No files found.
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """A layer that samples the next tokens from the model's outputs."""
+import os
 import itertools
 from collections.abc import Iterator
 from dataclasses import dataclass
@@ -210,6 +211,7 @@ class Sampler(nn.Module):
        # speculative decoding and when prompt embeddings are specified.
        self.include_gpu_probs_tensor = False
        self.should_modify_greedy_probs_inplace = False
+        self.tree_decoding = (os.environ.get('VLLM_TREE_DECODING') == '1')
    def _init_sampling_tensors(
        self,
@@ -347,7 +349,7 @@ class Sampler(nn.Module):
            sample_logprobs,
            on_device_tensors=on_device_tensors,
            skip_sampler_cpu_output=sampling_metadata.skip_sampler_cpu_output,
-            logits=logits)
+            logits=logits if self.tree_decoding else None)
    @property
    def _should_modify_greedy_probs_inplace(self) -> bool: