Unverified Commit a257d9bc authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Improve configs - `ObservabilityConfig` (#17453)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 015069b0
...@@ -14,6 +14,7 @@ from collections import Counter ...@@ -14,6 +14,7 @@ from collections import Counter
from contextlib import contextmanager from contextlib import contextmanager
from dataclasses import (MISSING, dataclass, field, fields, is_dataclass, from dataclasses import (MISSING, dataclass, field, fields, is_dataclass,
replace) replace)
from functools import cached_property
from importlib.util import find_spec from importlib.util import find_spec
from pathlib import Path from pathlib import Path
from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional, from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Literal, Optional,
...@@ -26,6 +27,7 @@ from transformers import PretrainedConfig ...@@ -26,6 +27,7 @@ from transformers import PretrainedConfig
from typing_extensions import deprecated from typing_extensions import deprecated
import vllm.envs as envs import vllm.envs as envs
from vllm import version
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS, from vllm.model_executor.layers.quantization import (QUANTIZATION_METHODS,
...@@ -3285,20 +3287,55 @@ class DecodingConfig: ...@@ -3285,20 +3287,55 @@ class DecodingConfig:
self.disable_additional_properties = True self.disable_additional_properties = True
DetailedTraceModules = Literal["model", "worker", "all"]
@config
@dataclass @dataclass
class ObservabilityConfig: class ObservabilityConfig:
"""Configuration for observability - metrics and tracing.""" """Configuration for observability - metrics and tracing."""
show_hidden_metrics: bool = False
otlp_traces_endpoint: Optional[str] = None
# Collecting detailed timing information for each request can be expensive. show_hidden_metrics_for_version: Optional[str] = None
"""Enable deprecated Prometheus metrics that have been hidden since the
# If set, collects the model forward time for the request. specified version. For example, if a previously deprecated metric has been
collect_model_forward_time: bool = False hidden since the v0.7.0 release, you use
`--show-hidden-metrics-for-version=0.7` as a temporary escape hatch while
you migrate to new metrics. The metric is likely to be removed completely
in an upcoming release."""
@cached_property
def show_hidden_metrics(self) -> bool:
"""Check if the hidden metrics should be shown."""
if self.show_hidden_metrics_for_version is None:
return False
return version._prev_minor_version_was(
self.show_hidden_metrics_for_version)
# If set, collects the model execute time for the request. otlp_traces_endpoint: Optional[str] = None
collect_model_execute_time: bool = False """Target URL to which OpenTelemetry traces will be sent."""
collect_detailed_traces: Optional[list[DetailedTraceModules]] = None
"""It makes sense to set this only if `--otlp-traces-endpoint` is set. If
set, it will collect detailed traces for the specified modules. This
involves use of possibly costly and or blocking operations and hence might
have a performance impact.
Note that collecting detailed timing information for each request can be
expensive."""
@cached_property
def collect_model_forward_time(self) -> bool:
"""Whether to collect model forward time for the request."""
return (self.collect_detailed_traces is not None
and ("model" in self.collect_detailed_traces
or "all" in self.collect_detailed_traces))
@cached_property
def collect_model_execute_time(self) -> bool:
"""Whether to collect model execute time for the request."""
return (self.collect_detailed_traces is not None
and ("worker" in self.collect_detailed_traces
or "all" in self.collect_detailed_traces))
def compute_hash(self) -> str: def compute_hash(self) -> str:
""" """
...@@ -3320,12 +3357,23 @@ class ObservabilityConfig: ...@@ -3320,12 +3357,23 @@ class ObservabilityConfig:
return hash_str return hash_str
def __post_init__(self): def __post_init__(self):
if (self.collect_detailed_traces is not None
and len(self.collect_detailed_traces) == 1
and "," in self.collect_detailed_traces[0]):
self._parse_collect_detailed_traces()
if not is_otel_available() and self.otlp_traces_endpoint is not None: if not is_otel_available() and self.otlp_traces_endpoint is not None:
raise ValueError( raise ValueError(
"OpenTelemetry is not available. Unable to configure " "OpenTelemetry is not available. Unable to configure "
"'otlp_traces_endpoint'. Ensure OpenTelemetry packages are " "'otlp_traces_endpoint'. Ensure OpenTelemetry packages are "
f"installed. Original error:\n{otel_import_error_traceback}") f"installed. Original error:\n{otel_import_error_traceback}")
def _parse_collect_detailed_traces(self):
assert isinstance(self.collect_detailed_traces, list)
self.collect_detailed_traces = cast(
list[DetailedTraceModules],
self.collect_detailed_traces[0].split(","))
class KVTransferConfig(BaseModel): class KVTransferConfig(BaseModel):
"""Configuration for distributed KV cache transfer.""" """Configuration for distributed KV cache transfer."""
......
...@@ -7,6 +7,7 @@ import json ...@@ -7,6 +7,7 @@ import json
import re import re
import threading import threading
from dataclasses import MISSING, dataclass, fields from dataclasses import MISSING, dataclass, fields
from itertools import permutations
from typing import (Any, Callable, Dict, List, Literal, Optional, Type, from typing import (Any, Callable, Dict, List, Literal, Optional, Type,
TypeVar, Union, cast, get_args, get_origin) TypeVar, Union, cast, get_args, get_origin)
...@@ -14,14 +15,13 @@ import torch ...@@ -14,14 +15,13 @@ import torch
from typing_extensions import TypeIs, deprecated from typing_extensions import TypeIs, deprecated
import vllm.envs as envs import vllm.envs as envs
from vllm import version
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig, from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
ConfigFormat, ConfigType, DecodingConfig, Device, ConfigFormat, ConfigType, DecodingConfig,
DeviceConfig, DistributedExecutorBackend, DetailedTraceModules, Device, DeviceConfig,
GuidedDecodingBackend, GuidedDecodingBackendV1, DistributedExecutorBackend, GuidedDecodingBackend,
HfOverrides, KVEventsConfig, KVTransferConfig, GuidedDecodingBackendV1, HfOverrides, KVEventsConfig,
LoadConfig, LoadFormat, LoRAConfig, ModelConfig, KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelDType, ModelImpl, MultiModalConfig, ModelConfig, ModelDType, ModelImpl, MultiModalConfig,
ObservabilityConfig, ParallelConfig, PoolerConfig, ObservabilityConfig, ParallelConfig, PoolerConfig,
PrefixCachingHashAlgo, PromptAdapterConfig, PrefixCachingHashAlgo, PromptAdapterConfig,
SchedulerConfig, SchedulerPolicy, SpeculativeConfig, SchedulerConfig, SchedulerPolicy, SpeculativeConfig,
...@@ -41,8 +41,6 @@ from vllm.utils import FlexibleArgumentParser, GiB_bytes, is_in_ray_actor ...@@ -41,8 +41,6 @@ from vllm.utils import FlexibleArgumentParser, GiB_bytes, is_in_ray_actor
logger = init_logger(__name__) logger = init_logger(__name__)
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
# object is used to allow for special typing forms # object is used to allow for special typing forms
T = TypeVar("T") T = TypeVar("T")
TypeHint = Union[type[Any], object] TypeHint = Union[type[Any], object]
...@@ -337,9 +335,12 @@ class EngineArgs: ...@@ -337,9 +335,12 @@ class EngineArgs:
speculative_config: Optional[Dict[str, Any]] = None speculative_config: Optional[Dict[str, Any]] = None
qlora_adapter_name_or_path: Optional[str] = None qlora_adapter_name_or_path: Optional[str] = None
show_hidden_metrics_for_version: Optional[str] = None show_hidden_metrics_for_version: Optional[str] = \
otlp_traces_endpoint: Optional[str] = None ObservabilityConfig.show_hidden_metrics_for_version
collect_detailed_traces: Optional[str] = None otlp_traces_endpoint: Optional[str] = \
ObservabilityConfig.otlp_traces_endpoint
collect_detailed_traces: Optional[list[DetailedTraceModules]] = \
ObservabilityConfig.collect_detailed_traces
disable_async_output_proc: bool = not ModelConfig.use_async_output_proc disable_async_output_proc: bool = not ModelConfig.use_async_output_proc
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls
...@@ -677,33 +678,29 @@ class EngineArgs: ...@@ -677,33 +678,29 @@ class EngineArgs:
default=None, default=None,
help='Name or path of the QLoRA adapter.') help='Name or path of the QLoRA adapter.')
parser.add_argument('--show-hidden-metrics-for-version', # Observability arguments
type=str, observability_kwargs = get_kwargs(ObservabilityConfig)
default=None, observability_group = parser.add_argument_group(
help='Enable deprecated Prometheus metrics that ' title="ObservabilityConfig",
'have been hidden since the specified version. ' description=ObservabilityConfig.__doc__,
'For example, if a previously deprecated metric ' )
'has been hidden since the v0.7.0 release, you ' observability_group.add_argument(
'use --show-hidden-metrics-for-version=0.7 as a ' "--show-hidden-metrics-for-version",
'temporary escape hatch while you migrate to new ' **observability_kwargs["show_hidden_metrics_for_version"])
'metrics. The metric is likely to be removed ' observability_group.add_argument(
'completely in an upcoming release.') "--otlp-traces-endpoint",
**observability_kwargs["otlp_traces_endpoint"])
parser.add_argument( # TODO: generalise this special case
'--otlp-traces-endpoint', choices = observability_kwargs["collect_detailed_traces"]["choices"]
type=str, metavar = f"{{{','.join(choices)}}}"
default=None, observability_kwargs["collect_detailed_traces"]["metavar"] = metavar
help='Target URL to which OpenTelemetry traces will be sent.') observability_kwargs["collect_detailed_traces"]["choices"] += [
parser.add_argument( ",".join(p)
'--collect-detailed-traces', for p in permutations(get_args(DetailedTraceModules), r=2)
type=str, ]
default=None, observability_group.add_argument(
help="Valid choices are " + "--collect-detailed-traces",
",".join(ALLOWED_DETAILED_TRACE_MODULES) + **observability_kwargs["collect_detailed_traces"])
". It makes sense to set this only if ``--otlp-traces-endpoint`` is"
" set. If set, it will collect detailed traces for the specified "
"modules. This involves use of possibly costly and or blocking "
"operations and hence might have a performance impact.")
# Scheduler arguments # Scheduler arguments
scheduler_kwargs = get_kwargs(SchedulerConfig) scheduler_kwargs = get_kwargs(SchedulerConfig)
...@@ -1094,26 +1091,11 @@ class EngineArgs: ...@@ -1094,26 +1091,11 @@ class EngineArgs:
if self.enable_reasoning else None, if self.enable_reasoning else None,
) )
show_hidden_metrics = False
if self.show_hidden_metrics_for_version is not None:
show_hidden_metrics = version._prev_minor_version_was(
self.show_hidden_metrics_for_version)
detailed_trace_modules = []
if self.collect_detailed_traces is not None:
detailed_trace_modules = self.collect_detailed_traces.split(",")
for m in detailed_trace_modules:
if m not in ALLOWED_DETAILED_TRACE_MODULES:
raise ValueError(
f"Invalid module {m} in collect_detailed_traces. "
f"Valid modules are {ALLOWED_DETAILED_TRACE_MODULES}")
observability_config = ObservabilityConfig( observability_config = ObservabilityConfig(
show_hidden_metrics=show_hidden_metrics, show_hidden_metrics_for_version=self.
show_hidden_metrics_for_version,
otlp_traces_endpoint=self.otlp_traces_endpoint, otlp_traces_endpoint=self.otlp_traces_endpoint,
collect_model_forward_time="model" in detailed_trace_modules collect_detailed_traces=self.collect_detailed_traces,
or "all" in detailed_trace_modules,
collect_model_execute_time="worker" in detailed_trace_modules
or "all" in detailed_trace_modules,
) )
config = VllmConfig( config = VllmConfig(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment