Merge branch 'v0.9.2-dev-wm' into 'v0.9.2-dev'

[fix]v0 SamplerOutput在非tree decoding时不传入logits See merge request dcutoolkit/deeplearing/vllm!185

Merge branch 'v0.9.2-dev-wm' into 'v0.9.2-dev'
[fix]v0 SamplerOutput在非tree decoding时不传入logits See merge request dcutoolkit/deeplearing/vllm!185
d8297312 · zhuwenwen · 9a4a94ee · 01692be6 · d8297312
Commit d8297312 authored Aug 20, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/model_executor/layers/sampler.py vllm/model_executor/layers/sampler.py +3 -1

No files found.
--- a/vllm/model_executor/layers/sampler.py
+++ b/vllm/model_executor/layers/sampler.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """A layer that samples the next tokens from the model's outputs."""
+import os
 import itertools
 from collections.abc import Iterator
 from dataclasses import dataclass
@@ -210,6 +211,7 @@ class Sampler(nn.Module):
        # speculative decoding and when prompt embeddings are specified.
        self.include_gpu_probs_tensor = False
        self.should_modify_greedy_probs_inplace = False
+        self.tree_decoding = (os.environ.get('VLLM_TREE_DECODING') == '1')
    def _init_sampling_tensors(
        self,
@@ -347,7 +349,7 @@ class Sampler(nn.Module):
            sample_logprobs,
            on_device_tensors=on_device_tensors,
            skip_sampler_cpu_output=sampling_metadata.skip_sampler_cpu_output,
-            logits=logits)
+            logits=logits if self.tree_decoding else None)
    @property
    def _should_modify_greedy_probs_inplace(self) -> bool: