[fix]v0 SamplerOutput在非tree decoding时不传入logits

1c7f32cd · zhuwenwen · ad038b4e · 1c7f32cd
Commit 1c7f32cd authored Aug 20, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/model_executor/layers/sampler.py vllm/model_executor/layers/sampler.py +3 -1

No files found.
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """A layer that samples the next tokens from the model's outputs."""
+import os
 import itertools
 from collections.abc import Iterator
 from dataclasses import dataclass
@@ -204,6 +205,7 @@ class Sampler(nn.Module):
        # speculative decoding and when prompt embeddings are specified.
        self.include_gpu_probs_tensor = False
        self.should_modify_greedy_probs_inplace = False
+        self.tree_decoding = (os.environ.get('VLLM_TREE_DECODING') == '1')
    def _init_sampling_tensors(
        self,
@@ -341,7 +343,7 @@ class Sampler(nn.Module):
            sample_logprobs,
            on_device_tensors=on_device_tensors,
            skip_sampler_cpu_output=sampling_metadata.skip_sampler_cpu_output,
-            logits=logits)
+            logits=logits if self.tree_decoding else None)
    @property
    def _should_modify_greedy_probs_inplace(self) -> bool: