Unverified Commit 46e678bc authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[Minor] Use larger batch sizes for A100/B100/B200/MI300x (#17073)


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent 6b2427f9
...@@ -35,7 +35,7 @@ from vllm.reasoning import ReasoningParserManager ...@@ -35,7 +35,7 @@ from vllm.reasoning import ReasoningParserManager
from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3 from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
from vllm.transformers_utils.utils import check_gguf_file from vllm.transformers_utils.utils import check_gguf_file
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.utils import FlexibleArgumentParser, is_in_ray_actor from vllm.utils import FlexibleArgumentParser, GiB_bytes, is_in_ray_actor
# yapf: enable # yapf: enable
...@@ -1625,13 +1625,13 @@ class EngineArgs: ...@@ -1625,13 +1625,13 @@ class EngineArgs:
# values for non-H100/H200 GPUs. # values for non-H100/H200 GPUs.
try: try:
from vllm.platforms import current_platform from vllm.platforms import current_platform
device_name = current_platform.get_device_name().lower() device_memory = current_platform.get_device_total_memory()
except Exception: except Exception:
# This is only used to set default_max_num_batched_tokens # This is only used to set default_max_num_batched_tokens
device_name = "no-device" device_memory = 0
if "h100" in device_name or "h200" in device_name: if device_memory >= 70 * GiB_bytes:
# For H100 and H200, we use larger default values. # For GPUs like H100 and MI300x, use larger default values.
default_max_num_batched_tokens = { default_max_num_batched_tokens = {
UsageContext.LLM_CLASS: 16384, UsageContext.LLM_CLASS: 16384,
UsageContext.OPENAI_API_SERVER: 8192, UsageContext.OPENAI_API_SERVER: 8192,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment