Unverified Commit 8baa4549 authored by Aarni Koskela's avatar Aarni Koskela Committed by GitHub
Browse files

[Misc] Move device options to a single place (#8322)

parent 73202dbe
...@@ -10,7 +10,7 @@ import torch ...@@ -10,7 +10,7 @@ import torch
from tqdm import tqdm from tqdm import tqdm
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import DEVICE_OPTIONS, EngineArgs
from vllm.inputs import PromptInputs from vllm.inputs import PromptInputs
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.utils import FlexibleArgumentParser from vllm.utils import FlexibleArgumentParser
...@@ -205,13 +205,11 @@ if __name__ == '__main__': ...@@ -205,13 +205,11 @@ if __name__ == '__main__':
default=None, default=None,
help=('path to save the pytorch profiler output. Can be visualized ' help=('path to save the pytorch profiler output. Can be visualized '
'with ui.perfetto.dev or Tensorboard.')) 'with ui.perfetto.dev or Tensorboard.'))
parser.add_argument( parser.add_argument("--device",
"--device", type=str,
type=str, default="auto",
default="auto", choices=DEVICE_OPTIONS,
choices=["auto", "cuda", "cpu", "openvino", "tpu", "xpu"], help='device type for vLLM execution')
help='device type for vLLM execution, supporting CUDA, OpenVINO and '
'CPU.')
parser.add_argument('--block-size', parser.add_argument('--block-size',
type=int, type=int,
default=16, default=16,
......
...@@ -11,7 +11,7 @@ from tqdm import tqdm ...@@ -11,7 +11,7 @@ from tqdm import tqdm
from transformers import (AutoModelForCausalLM, AutoTokenizer, from transformers import (AutoModelForCausalLM, AutoTokenizer,
PreTrainedTokenizerBase) PreTrainedTokenizerBase)
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs from vllm.engine.arg_utils import DEVICE_OPTIONS, AsyncEngineArgs, EngineArgs
from vllm.entrypoints.openai.api_server import ( from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args) build_async_engine_client_from_engine_args)
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
...@@ -451,13 +451,11 @@ if __name__ == "__main__": ...@@ -451,13 +451,11 @@ if __name__ == "__main__":
'accuracy issues. FP8_E5M2 (without scaling) is only supported on ' 'accuracy issues. FP8_E5M2 (without scaling) is only supported on '
'cuda version greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is ' 'cuda version greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is '
'instead supported for common inference criteria.') 'instead supported for common inference criteria.')
parser.add_argument( parser.add_argument("--device",
"--device", type=str,
type=str, default="auto",
default="auto", choices=DEVICE_OPTIONS,
choices=["auto", "cuda", "cpu", "openvino", "tpu", "xpu"], help='device type for vLLM execution')
help='device type for vLLM execution, supporting CUDA, OpenVINO and '
'CPU.')
parser.add_argument( parser.add_argument(
"--num-scheduler-steps", "--num-scheduler-steps",
type=int, type=int,
......
...@@ -26,6 +26,16 @@ logger = init_logger(__name__) ...@@ -26,6 +26,16 @@ logger = init_logger(__name__)
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"] ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
DEVICE_OPTIONS = [
"auto",
"cuda",
"neuron",
"cpu",
"openvino",
"tpu",
"xpu",
]
def nullable_str(val: str): def nullable_str(val: str):
if not val or val == "None": if not val or val == "None":
...@@ -553,10 +563,7 @@ class EngineArgs: ...@@ -553,10 +563,7 @@ class EngineArgs:
parser.add_argument("--device", parser.add_argument("--device",
type=str, type=str,
default=EngineArgs.device, default=EngineArgs.device,
choices=[ choices=DEVICE_OPTIONS,
"auto", "cuda", "neuron", "cpu", "openvino",
"tpu", "xpu"
],
help='Device type for vLLM execution.') help='Device type for vLLM execution.')
parser.add_argument('--num-scheduler-steps', parser.add_argument('--num-scheduler-steps',
type=int, type=int,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment