"vscode:/vscode.git/clone" did not exist on "20ad730cfde470de79a59eae6ed20938a23ace3c"
Unverified Commit 9f2f95af authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

fix: overriding vLLM boolean flags (#1670)

parent 9d7c5df5
......@@ -102,12 +102,28 @@ class ServiceConfig(dict):
# Strip prefix if needed
arg_key = key[len(prefix) :] if prefix and key.startswith(prefix) else key
# vLLM arguments that default to True and need explicit false handling
# Based on https://github.com/vllm-project/vllm/blob/v0.9.1/vllm/config.py
vllm_true_defaults = {
"enable_prefix_caching": "no-enable-prefix-caching",
"use_tqdm_on_load": "no-use-tqdm-on-load",
"multi_step_stream_outputs": "no-multi-step-stream-outputs",
}
# Normalize key for comparison (replace hyphens with underscores)
normalized_key = arg_key.replace("-", "_")
# Convert to CLI format
if isinstance(value, bool):
if value:
args.extend([f"--{arg_key}", "true"])
# Always output true values as flags
args.append(f"--{arg_key}")
else:
args.extend([f"--{arg_key}", "false"])
# For false values, check if this is a vLLM arg that defaults to True
if normalized_key in vllm_true_defaults:
# Use negative flag if available
args.append(f"--{vllm_true_defaults[normalized_key]}")
# For other false values, omit entirely (standard action="store_true" behavior)
elif isinstance(value, dict):
args.extend([f"--{arg_key}", json.dumps(value)])
else:
......
......@@ -143,7 +143,7 @@ def test_service_config_override_common_configs():
def test_explicit_boolean_arguments():
"""Test that boolean arguments are handled with explicit true/false values"""
"""Test that boolean arguments are handled correctly with new logic"""
# Reset singleton instance
ServiceConfig._instance = None
......@@ -160,21 +160,65 @@ def test_explicit_boolean_arguments():
}
"""
# Get arguments and verify explicit boolean handling
# Get arguments and verify boolean handling
service_config = ServiceConfig.get_instance()
vllm_worker_args = service_config.as_args("VllmWorker")
# Check that true values are passed as --arg true
# Check that true values are passed as flags only
assert "--enable-prefix-caching" in vllm_worker_args
# Should NOT have a following "true" value
enable_idx = vllm_worker_args.index("--enable-prefix-caching")
assert vllm_worker_args[enable_idx + 1] == "true"
assert (
enable_idx == len(vllm_worker_args) - 1
or not vllm_worker_args[enable_idx + 1] == "true"
)
# Check that false values are passed as --arg false
assert "--disable-sliding-window" in vllm_worker_args
disable_idx = vllm_worker_args.index("--disable-sliding-window")
assert vllm_worker_args[disable_idx + 1] == "false"
# Check that false values for standard boolean flags are omitted
assert "--disable-sliding-window" not in vllm_worker_args
# Check that another true value works
# Check that another true value works as flag
assert "--enforce-eager" in vllm_worker_args
enforce_idx = vllm_worker_args.index("--enforce-eager")
assert vllm_worker_args[enforce_idx + 1] == "true"
assert (
enforce_idx == len(vllm_worker_args) - 1
or not vllm_worker_args[enforce_idx + 1] == "true"
)
def test_vllm_boolean_arguments_special_handling():
"""Test that vLLM boolean arguments with special defaults are handled correctly"""
# Reset singleton instance
ServiceConfig._instance = None
# Set environment variable with vLLM boolean configs
os.environ[
"DYNAMO_SERVICE_CONFIG"
] = """
{
"VllmWorker": {
"enable-prefix-caching": false,
"use-tqdm-on-load": false,
"multi-step-stream-outputs": false,
"some-other-flag": false
}
}
"""
# Get arguments and verify vLLM special boolean handling
service_config = ServiceConfig.get_instance()
vllm_worker_args = service_config.as_args("VllmWorker")
# Check that enable-prefix-caching false uses negative flag
assert "--no-enable-prefix-caching" in vllm_worker_args
assert "--enable-prefix-caching" not in vllm_worker_args
# Check that use-tqdm-on-load false uses negative flag
assert "--no-use-tqdm-on-load" in vllm_worker_args
assert "--use-tqdm-on-load" not in vllm_worker_args
# Check that multi-step-stream-outputs false uses negative flag
assert "--no-multi-step-stream-outputs" in vllm_worker_args
assert "--multi-step-stream-outputs" not in vllm_worker_args
# Check that other false flags are omitted (standard behavior)
assert "--some-other-flag" not in vllm_worker_args
......@@ -307,9 +307,9 @@ def parse_vllm_args(service_name, prefix) -> AsyncEngineArgs:
# Add custom arguments
parser.add_argument("--router", type=str, choices=["random", "round-robin", "kv"], default="random")
parser.add_argument("--remote-prefill", action="store_true")
parser.add_argument("--remote-prefill", action="store_true", default=False)
# Add VLLM's arguments
# Add VLLM's arguments (ServiceConfig handles True defaults automatically)
parser = AsyncEngineArgs.add_cli_args(parser)
# Parse both custom and VLLM arguments
......@@ -325,6 +325,33 @@ def parse_vllm_args(service_name, prefix) -> AsyncEngineArgs:
return engine_args
```
#### Boolean Argument Handling
ServiceConfig uses a targeted approach for boolean arguments to maintain compatibility with different argument parsers:
1. Standard Boolean Handling:
- `true` → outputs just the flag (e.g., `--enable-feature`)
- `false` → omitted entirely (uses parser's default)
2. vLLM True-Default Arguments (targeted override support):
- Automatically detects vLLM arguments that default to `True` and need explicit `false` handling
- `enable-prefix-caching: false``--no-enable-prefix-caching` (negative flag)
- `multi-step-stream-outputs: false``--no-multi-step-stream-outputs` (negative flag)
```yaml
# Example YAML configuration
VllmWorker:
# Standard boolean flags (action="store_true" style)
enforce-eager: true # → --enforce-eager
disable-logging: false # → (omitted)
# vLLM arguments with True defaults (automatically handled)
enable-prefix-caching: false # → --no-enable-prefix-caching
# Non-boolean arguments
max-model-len: 16384 # → --max-model-len 16384
```
#### Overriding Service Decorator with ServiceArgs
The `ServiceArgs` section in YAML configuration allows you to override any parameter in the `@service` decorator:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment