Commit 29e922ac authored by lizhigong's avatar lizhigong
Browse files

roll back unused change

parent 0ee425a6
......@@ -48,13 +48,13 @@ class MultiprocessingDistributedExecutor(DistributedExecutorBase):
f"is less than than max local gpu count ({cuda_device_count})")
# Set CUDA_VISIBLE_DEVICES for the driver, inherited by workers
# if "CUDA_VISIBLE_DEVICES" or "HIP_VISIBLE_DEVICES" not in os.environ:
# update_environment_variables({
# "CUDA_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
# })
# update_environment_variables({
# "HIP_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
# })
if "CUDA_VISIBLE_DEVICES" or "HIP_VISIBLE_DEVICES" not in os.environ:
update_environment_variables({
"CUDA_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
})
update_environment_variables({
"HIP_VISIBLE_DEVICES": (",".join(map(str, range(world_size))))
})
def _init_executor(self) -> None:
......
......@@ -699,7 +699,7 @@ def _sample_with_torch(
if sampling_type == SamplingType.GREEDY:
greedy_samples = torch.argmax(logprobs[long_sample_indices],
dim=-1)
sampled_token_ids_ = greedy_samples.unsqueeze(-1)
if sampled_token_ids_tensor is not None:
# Store sampled tokens in output tensor.
sampled_token_ids_tensor[
......@@ -736,8 +736,7 @@ def _sample_with_torch(
probs[long_sample_indices],
max_n_in_batch,
seq_groups=seq_groups_arg)
sampled_token_ids_ = \
multinomial_samples[sampling_type].to(torch.long)
if sampled_token_ids_tensor is not None:
# Store sampled tokens in output tensor.
sampled_token_ids_tensor[long_sample_indices] = \
......
......@@ -11,7 +11,6 @@ from vllm.platforms import current_platform
from vllm.sequence import (CompletionSequenceGroupOutput, Logprob,
PromptLogprobs, SequenceGroupMetadata,
SequenceOutput)
from vllm.zero_overhead.utils import is_zero_overhead
SeqId = int
......@@ -140,6 +139,7 @@ def split_batch_by_proposal_len(
zero or not. We should remove this once vLLM supports per-sequence proposal
lens in a batch.
"""
nonzero_lists: Tuple[List[SequenceGroupMetadata], List[int]] = ([], [])
zero_lists: Tuple[List[SequenceGroupMetadata], List[int]] = ([], [])
for i, (seq_group, proposal_len) in enumerate(
......
......@@ -987,7 +987,7 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
]
multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list)
ret = self.model_input_cls(
return self.model_input_cls(
input_tokens=input_tokens_tensor,
input_positions=input_positions_tensor,
token_types=token_types_tensor,
......@@ -1002,8 +1002,6 @@ class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]):
prompt_adapter_mapping=prompt_adapter_mapping,
prompt_adapter_requests=prompt_adapter_requests)
return ret
class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment