Commit 29e922ac authored by lizhigong's avatar lizhigong
Browse files

roll back unused change

parent 0ee425a6
...@@ -48,13 +48,13 @@ class MultiprocessingDistributedExecutor(DistributedExecutorBase): ...@@ -48,13 +48,13 @@ class MultiprocessingDistributedExecutor(DistributedExecutorBase):
f"is less than than max local gpu count ({cuda_device_count})") f"is less than than max local gpu count ({cuda_device_count})")
# Set CUDA_VISIBLE_DEVICES for the driver, inherited by workers # Set CUDA_VISIBLE_DEVICES for the driver, inherited by workers
# if "CUDA_VISIBLE_DEVICES" or "HIP_VISIBLE_DEVICES" not in os.environ: if "CUDA_VISIBLE_DEVICES" or "HIP_VISIBLE_DEVICES" not in os.environ:
# update_environment_variables({ update_environment_variables({
# "CUDA_VISIBLE_DEVICES": (",".join(map(str, range(world_size)))) "CUDA_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
# }) })
# update_environment_variables({ update_environment_variables({
# "HIP_VISIBLE_DEVICES": (",".join(map(str, range(world_size)))) "HIP_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
# }) })
def _init_executor(self) -> None: def _init_executor(self) -> None:
......
...@@ -699,7 +699,7 @@ def _sample_with_torch( ...@@ -699,7 +699,7 @@ def _sample_with_torch(
if sampling_type == SamplingType.GREEDY: if sampling_type == SamplingType.GREEDY:
greedy_samples = torch.argmax(logprobs[long_sample_indices], greedy_samples = torch.argmax(logprobs[long_sample_indices],
dim=-1) dim=-1)
sampled_token_ids_ = greedy_samples.unsqueeze(-1)
if sampled_token_ids_tensor is not None: if sampled_token_ids_tensor is not None:
# Store sampled tokens in output tensor. # Store sampled tokens in output tensor.
sampled_token_ids_tensor[ sampled_token_ids_tensor[
...@@ -736,8 +736,7 @@ def _sample_with_torch( ...@@ -736,8 +736,7 @@ def _sample_with_torch(
probs[long_sample_indices], probs[long_sample_indices],
max_n_in_batch, max_n_in_batch,
seq_groups=seq_groups_arg) seq_groups=seq_groups_arg)
sampled_token_ids_ = \
multinomial_samples[sampling_type].to(torch.long)
if sampled_token_ids_tensor is not None: if sampled_token_ids_tensor is not None:
# Store sampled tokens in output tensor. # Store sampled tokens in output tensor.
sampled_token_ids_tensor[long_sample_indices] = \ sampled_token_ids_tensor[long_sample_indices] = \
......
...@@ -11,7 +11,6 @@ from vllm.platforms import current_platform ...@@ -11,7 +11,6 @@ from vllm.platforms import current_platform
from vllm.sequence import (CompletionSequenceGroupOutput, Logprob, from vllm.sequence import (CompletionSequenceGroupOutput, Logprob,
PromptLogprobs, SequenceGroupMetadata, PromptLogprobs, SequenceGroupMetadata,
SequenceOutput) SequenceOutput)
from vllm.zero_overhead.utils import is_zero_overhead
SeqId = int SeqId = int
...@@ -140,6 +139,7 @@ def split_batch_by_proposal_len( ...@@ -140,6 +139,7 @@ def split_batch_by_proposal_len(
zero or not. We should remove this once vLLM supports per-sequence proposal zero or not. We should remove this once vLLM supports per-sequence proposal
lens in a batch. lens in a batch.
""" """
nonzero_lists: Tuple[List[SequenceGroupMetadata], List[int]] = ([], []) nonzero_lists: Tuple[List[SequenceGroupMetadata], List[int]] = ([], [])
zero_lists: Tuple[List[SequenceGroupMetadata], List[int]] = ([], []) zero_lists: Tuple[List[SequenceGroupMetadata], List[int]] = ([], [])
for i, (seq_group, proposal_len) in enumerate( for i, (seq_group, proposal_len) in enumerate(
......
...@@ -987,7 +987,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]): ...@@ -987,7 +987,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
] ]
multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list) multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
ret = self.model_input_cls( return self.model_input_cls(
input_tokens=input_tokens_tensor, input_tokens=input_tokens_tensor,
input_positions=input_positions_tensor, input_positions=input_positions_tensor,
token_types=token_types_tensor, token_types=token_types_tensor,
...@@ -1001,8 +1001,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]): ...@@ -1001,8 +1001,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
finished_requests_ids=self.finished_requests_ids, finished_requests_ids=self.finished_requests_ids,
prompt_adapter_mapping=prompt_adapter_mapping, prompt_adapter_mapping=prompt_adapter_mapping,
prompt_adapter_requests=prompt_adapter_requests) prompt_adapter_requests=prompt_adapter_requests)
return ret
class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]): class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment