Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8baa4549
Unverified
Commit
8baa4549
authored
Sep 11, 2024
by
Aarni Koskela
Committed by
GitHub
Sep 11, 2024
Browse files
[Misc] Move device options to a single place (#8322)
parent
73202dbe
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
20 deletions
+23
-20
benchmarks/benchmark_latency.py
benchmarks/benchmark_latency.py
+6
-8
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+6
-8
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+11
-4
No files found.
benchmarks/benchmark_latency.py
View file @
8baa4549
...
...
@@ -10,7 +10,7 @@ import torch
from
tqdm
import
tqdm
from
vllm
import
LLM
,
SamplingParams
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.arg_utils
import
DEVICE_OPTIONS
,
EngineArgs
from
vllm.inputs
import
PromptInputs
from
vllm.model_executor.layers.quantization
import
QUANTIZATION_METHODS
from
vllm.utils
import
FlexibleArgumentParser
...
...
@@ -205,13 +205,11 @@ if __name__ == '__main__':
default
=
None
,
help
=
(
'path to save the pytorch profiler output. Can be visualized '
'with ui.perfetto.dev or Tensorboard.'
))
parser
.
add_argument
(
"--device"
,
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
"auto"
,
choices
=
[
"auto"
,
"cuda"
,
"cpu"
,
"openvino"
,
"tpu"
,
"xpu"
],
help
=
'device type for vLLM execution, supporting CUDA, OpenVINO and '
'CPU.'
)
choices
=
DEVICE_OPTIONS
,
help
=
'device type for vLLM execution'
)
parser
.
add_argument
(
'--block-size'
,
type
=
int
,
default
=
16
,
...
...
benchmarks/benchmark_throughput.py
View file @
8baa4549
...
...
@@ -11,7 +11,7 @@ from tqdm import tqdm
from
transformers
import
(
AutoModelForCausalLM
,
AutoTokenizer
,
PreTrainedTokenizerBase
)
from
vllm.engine.arg_utils
import
AsyncEngineArgs
,
EngineArgs
from
vllm.engine.arg_utils
import
DEVICE_OPTIONS
,
AsyncEngineArgs
,
EngineArgs
from
vllm.entrypoints.openai.api_server
import
(
build_async_engine_client_from_engine_args
)
from
vllm.model_executor.layers.quantization
import
QUANTIZATION_METHODS
...
...
@@ -451,13 +451,11 @@ if __name__ == "__main__":
'accuracy issues. FP8_E5M2 (without scaling) is only supported on '
'cuda version greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is '
'instead supported for common inference criteria.'
)
parser
.
add_argument
(
"--device"
,
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
"auto"
,
choices
=
[
"auto"
,
"cuda"
,
"cpu"
,
"openvino"
,
"tpu"
,
"xpu"
],
help
=
'device type for vLLM execution, supporting CUDA, OpenVINO and '
'CPU.'
)
choices
=
DEVICE_OPTIONS
,
help
=
'device type for vLLM execution'
)
parser
.
add_argument
(
"--num-scheduler-steps"
,
type
=
int
,
...
...
vllm/engine/arg_utils.py
View file @
8baa4549
...
...
@@ -26,6 +26,16 @@ logger = init_logger(__name__)
ALLOWED_DETAILED_TRACE_MODULES
=
[
"model"
,
"worker"
,
"all"
]
DEVICE_OPTIONS
=
[
"auto"
,
"cuda"
,
"neuron"
,
"cpu"
,
"openvino"
,
"tpu"
,
"xpu"
,
]
def
nullable_str
(
val
:
str
):
if
not
val
or
val
==
"None"
:
...
...
@@ -553,10 +563,7 @@ class EngineArgs:
parser
.
add_argument
(
"--device"
,
type
=
str
,
default
=
EngineArgs
.
device
,
choices
=
[
"auto"
,
"cuda"
,
"neuron"
,
"cpu"
,
"openvino"
,
"tpu"
,
"xpu"
],
choices
=
DEVICE_OPTIONS
,
help
=
'Device type for vLLM execution.'
)
parser
.
add_argument
(
'--num-scheduler-steps'
,
type
=
int
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment