Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
29e922ac
Commit
29e922ac
authored
May 09, 2025
by
lizhigong
Browse files
roll back unused change
parent
0ee425a6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
14 deletions
+11
-14
vllm/executor/mp_distributed_executor.py
vllm/executor/mp_distributed_executor.py
+7
-7
vllm/model_executor/layers/sampler.py
vllm/model_executor/layers/sampler.py
+2
-3
vllm/spec_decode/util.py
vllm/spec_decode/util.py
+1
-1
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+1
-3
No files found.
vllm/executor/mp_distributed_executor.py
View file @
29e922ac
...
@@ -48,13 +48,13 @@ class MultiprocessingDistributedExecutor(DistributedExecutorBase):
...
@@ -48,13 +48,13 @@ class MultiprocessingDistributedExecutor(DistributedExecutorBase):
f
"is less than than max local gpu count (
{
cuda_device_count
}
)"
)
f
"is less than than max local gpu count (
{
cuda_device_count
}
)"
)
# Set CUDA_VISIBLE_DEVICES for the driver, inherited by workers
# Set CUDA_VISIBLE_DEVICES for the driver, inherited by workers
#
if "CUDA_VISIBLE_DEVICES" or "HIP_VISIBLE_DEVICES" not in os.environ:
if
"CUDA_VISIBLE_DEVICES"
or
"HIP_VISIBLE_DEVICES"
not
in
os
.
environ
:
#
update_environment_variables({
update_environment_variables
({
#
"CUDA_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
"CUDA_VISIBLE_DEVICES"
:
(
","
.
join
(
map
(
str
,
range
(
world_size
))))
#
})
})
#
update_environment_variables({
update_environment_variables
({
#
"HIP_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
"HIP_VISIBLE_DEVICES"
:
(
","
.
join
(
map
(
str
,
range
(
world_size
))))
#
})
})
def
_init_executor
(
self
)
->
None
:
def
_init_executor
(
self
)
->
None
:
...
...
vllm/model_executor/layers/sampler.py
View file @
29e922ac
...
@@ -699,7 +699,7 @@ def _sample_with_torch(
...
@@ -699,7 +699,7 @@ def _sample_with_torch(
if
sampling_type
==
SamplingType
.
GREEDY
:
if
sampling_type
==
SamplingType
.
GREEDY
:
greedy_samples
=
torch
.
argmax
(
logprobs
[
long_sample_indices
],
greedy_samples
=
torch
.
argmax
(
logprobs
[
long_sample_indices
],
dim
=-
1
)
dim
=-
1
)
sampled_token_ids_
=
greedy_samples
.
unsqueeze
(
-
1
)
if
sampled_token_ids_tensor
is
not
None
:
if
sampled_token_ids_tensor
is
not
None
:
# Store sampled tokens in output tensor.
# Store sampled tokens in output tensor.
sampled_token_ids_tensor
[
sampled_token_ids_tensor
[
...
@@ -736,8 +736,7 @@ def _sample_with_torch(
...
@@ -736,8 +736,7 @@ def _sample_with_torch(
probs
[
long_sample_indices
],
probs
[
long_sample_indices
],
max_n_in_batch
,
max_n_in_batch
,
seq_groups
=
seq_groups_arg
)
seq_groups
=
seq_groups_arg
)
sampled_token_ids_
=
\
multinomial_samples
[
sampling_type
].
to
(
torch
.
long
)
if
sampled_token_ids_tensor
is
not
None
:
if
sampled_token_ids_tensor
is
not
None
:
# Store sampled tokens in output tensor.
# Store sampled tokens in output tensor.
sampled_token_ids_tensor
[
long_sample_indices
]
=
\
sampled_token_ids_tensor
[
long_sample_indices
]
=
\
...
...
vllm/spec_decode/util.py
View file @
29e922ac
...
@@ -11,7 +11,6 @@ from vllm.platforms import current_platform
...
@@ -11,7 +11,6 @@ from vllm.platforms import current_platform
from
vllm.sequence
import
(
CompletionSequenceGroupOutput
,
Logprob
,
from
vllm.sequence
import
(
CompletionSequenceGroupOutput
,
Logprob
,
PromptLogprobs
,
SequenceGroupMetadata
,
PromptLogprobs
,
SequenceGroupMetadata
,
SequenceOutput
)
SequenceOutput
)
from
vllm.zero_overhead.utils
import
is_zero_overhead
SeqId
=
int
SeqId
=
int
...
@@ -140,6 +139,7 @@ def split_batch_by_proposal_len(
...
@@ -140,6 +139,7 @@ def split_batch_by_proposal_len(
zero or not. We should remove this once vLLM supports per-sequence proposal
zero or not. We should remove this once vLLM supports per-sequence proposal
lens in a batch.
lens in a batch.
"""
"""
nonzero_lists
:
Tuple
[
List
[
SequenceGroupMetadata
],
List
[
int
]]
=
([],
[])
nonzero_lists
:
Tuple
[
List
[
SequenceGroupMetadata
],
List
[
int
]]
=
([],
[])
zero_lists
:
Tuple
[
List
[
SequenceGroupMetadata
],
List
[
int
]]
=
([],
[])
zero_lists
:
Tuple
[
List
[
SequenceGroupMetadata
],
List
[
int
]]
=
([],
[])
for
i
,
(
seq_group
,
proposal_len
)
in
enumerate
(
for
i
,
(
seq_group
,
proposal_len
)
in
enumerate
(
...
...
vllm/worker/model_runner.py
View file @
29e922ac
...
@@ -987,7 +987,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
...
@@ -987,7 +987,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
]
]
multi_modal_kwargs
=
MultiModalKwargs
.
batch
(
multi_modal_kwargs_list
)
multi_modal_kwargs
=
MultiModalKwargs
.
batch
(
multi_modal_kwargs_list
)
ret
=
self
.
model_input_cls
(
ret
urn
self
.
model_input_cls
(
input_tokens
=
input_tokens_tensor
,
input_tokens
=
input_tokens_tensor
,
input_positions
=
input_positions_tensor
,
input_positions
=
input_positions_tensor
,
token_types
=
token_types_tensor
,
token_types
=
token_types_tensor
,
...
@@ -1001,8 +1001,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
...
@@ -1001,8 +1001,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
finished_requests_ids
=
self
.
finished_requests_ids
,
finished_requests_ids
=
self
.
finished_requests_ids
,
prompt_adapter_mapping
=
prompt_adapter_mapping
,
prompt_adapter_mapping
=
prompt_adapter_mapping
,
prompt_adapter_requests
=
prompt_adapter_requests
)
prompt_adapter_requests
=
prompt_adapter_requests
)
return
ret
class
GPUModelRunnerBase
(
ModelRunnerBase
[
TModelInputForGPU
]):
class
GPUModelRunnerBase
(
ModelRunnerBase
[
TModelInputForGPU
]):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment