Unverified Commit f68dd998 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Rename customer label -> custom label (#10899)


Co-authored-by: default avatarYingchun Lai <laiyingchun@apache.org>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 35ec2a45
...@@ -164,8 +164,8 @@ Please consult the documentation below and [server_args.py](https://github.com/s ...@@ -164,8 +164,8 @@ Please consult the documentation below and [server_args.py](https://github.com/s
| `--kv-events-config` | Config in json format for NVIDIA dynamo KV event publishing. Publishing will be enabled if this flag is used. | None | | `--kv-events-config` | Config in json format for NVIDIA dynamo KV event publishing. Publishing will be enabled if this flag is used. | None |
| `--decode-log-interval` | The log interval of decode batch. | 40 | | `--decode-log-interval` | The log interval of decode batch. | 40 |
| `--enable-request-time-stats-logging` | Enable per request time stats logging. | False | | `--enable-request-time-stats-logging` | Enable per request time stats logging. | False |
| `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> <value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None | | `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> <value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500'). | None |
| `--generation-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> <value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None | | `--generation-tokens-buckets` | The buckets rule of generation tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> <value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500'). | None |
## API related ## API related
......
...@@ -235,8 +235,8 @@ class CompletionRequest(BaseModel): ...@@ -235,8 +235,8 @@ class CompletionRequest(BaseModel):
# Priority for the request # Priority for the request
priority: Optional[int] = None priority: Optional[int] = None
# For customer metric labels # For custom metric labels
customer_labels: Optional[Dict[str, str]] = None custom_labels: Optional[Dict[str, str]] = None
@field_validator("max_tokens") @field_validator("max_tokens")
@classmethod @classmethod
......
...@@ -27,10 +27,10 @@ class OpenAIServingBase(ABC): ...@@ -27,10 +27,10 @@ class OpenAIServingBase(ABC):
self.tokenizer_manager = tokenizer_manager self.tokenizer_manager = tokenizer_manager
self.allowed_custom_labels = ( self.allowed_custom_labels = (
set( set(
self.tokenizer_manager.server_args.tokenizer_metrics_allowed_customer_labels self.tokenizer_manager.server_args.tokenizer_metrics_allowed_custom_labels
) )
if isinstance(self.tokenizer_manager.server_args, ServerArgs) if isinstance(self.tokenizer_manager.server_args, ServerArgs)
and self.tokenizer_manager.server_args.tokenizer_metrics_allowed_customer_labels and self.tokenizer_manager.server_args.tokenizer_metrics_allowed_custom_labels
else None else None
) )
...@@ -178,14 +178,14 @@ class OpenAIServingBase(ABC): ...@@ -178,14 +178,14 @@ class OpenAIServingBase(ABC):
) )
return json.dumps({"error": error.model_dump()}) return json.dumps({"error": error.model_dump()})
def extract_customer_labels(self, raw_request): def extract_custom_labels(self, raw_request):
if ( if (
not self.allowed_custom_labels not self.allowed_custom_labels
or not self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header or not self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header
): ):
return None return None
customer_labels = None custom_labels = None
header = ( header = (
self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header
) )
...@@ -200,9 +200,9 @@ class OpenAIServingBase(ABC): ...@@ -200,9 +200,9 @@ class OpenAIServingBase(ABC):
raw_labels = None raw_labels = None
if isinstance(raw_labels, dict): if isinstance(raw_labels, dict):
customer_labels = { custom_labels = {
label: value label: value
for label, value in raw_labels.items() for label, value in raw_labels.items()
if label in self.allowed_custom_labels if label in self.allowed_custom_labels
} }
return customer_labels return custom_labels
...@@ -128,8 +128,8 @@ class OpenAIServingChat(OpenAIServingBase): ...@@ -128,8 +128,8 @@ class OpenAIServingChat(OpenAIServingBase):
else: else:
prompt_kwargs = {"input_ids": processed_messages.prompt_ids} prompt_kwargs = {"input_ids": processed_messages.prompt_ids}
# Extract customer labels from raw request headers # Extract custom labels from raw request headers
customer_labels = self.extract_customer_labels(raw_request) custom_labels = self.extract_custom_labels(raw_request)
adapted_request = GenerateReqInput( adapted_request = GenerateReqInput(
**prompt_kwargs, **prompt_kwargs,
...@@ -151,7 +151,7 @@ class OpenAIServingChat(OpenAIServingBase): ...@@ -151,7 +151,7 @@ class OpenAIServingChat(OpenAIServingBase):
rid=request.rid, rid=request.rid,
extra_key=self._compute_extra_key(request), extra_key=self._compute_extra_key(request),
priority=request.priority, priority=request.priority,
customer_labels=customer_labels, custom_labels=custom_labels,
) )
return adapted_request, request return adapted_request, request
......
...@@ -90,8 +90,8 @@ class OpenAIServingCompletion(OpenAIServingBase): ...@@ -90,8 +90,8 @@ class OpenAIServingCompletion(OpenAIServingBase):
else: else:
prompt_kwargs = {"input_ids": prompt} prompt_kwargs = {"input_ids": prompt}
# Extract customer labels from raw request headers # Extract custom labels from raw request headers
customer_labels = self.extract_customer_labels(raw_request) custom_labels = self.extract_custom_labels(raw_request)
adapted_request = GenerateReqInput( adapted_request = GenerateReqInput(
**prompt_kwargs, **prompt_kwargs,
...@@ -109,7 +109,7 @@ class OpenAIServingCompletion(OpenAIServingBase): ...@@ -109,7 +109,7 @@ class OpenAIServingCompletion(OpenAIServingBase):
rid=request.rid, rid=request.rid,
extra_key=self._compute_extra_key(request), extra_key=self._compute_extra_key(request),
priority=request.priority, priority=request.priority,
customer_labels=customer_labels, custom_labels=custom_labels,
) )
return adapted_request, request return adapted_request, request
......
...@@ -143,8 +143,8 @@ class GenerateReqInput: ...@@ -143,8 +143,8 @@ class GenerateReqInput:
# Image gen grpc migration # Image gen grpc migration
return_bytes: bool = False return_bytes: bool = False
# For customer metric labels # For custom metric labels
customer_labels: Optional[Dict[str, str]] = None custom_labels: Optional[Dict[str, str]] = None
def contains_mm_input(self) -> bool: def contains_mm_input(self) -> bool:
return ( return (
......
...@@ -320,8 +320,8 @@ class TokenizerManager(TokenizerCommunicatorMixin): ...@@ -320,8 +320,8 @@ class TokenizerManager(TokenizerCommunicatorMixin):
"model_name": self.server_args.served_model_name, "model_name": self.server_args.served_model_name,
# TODO: Add lora name/path in the future, # TODO: Add lora name/path in the future,
} }
if server_args.tokenizer_metrics_allowed_customer_labels: if server_args.tokenizer_metrics_allowed_custom_labels:
for label in server_args.tokenizer_metrics_allowed_customer_labels: for label in server_args.tokenizer_metrics_allowed_custom_labels:
labels[label] = "" labels[label] = ""
self.metrics_collector = TokenizerMetricsCollector( self.metrics_collector = TokenizerMetricsCollector(
server_args=server_args, server_args=server_args,
...@@ -1633,10 +1633,10 @@ class TokenizerManager(TokenizerCommunicatorMixin): ...@@ -1633,10 +1633,10 @@ class TokenizerManager(TokenizerCommunicatorMixin):
else 0 else 0
) )
customer_labels = getattr(state.obj, "customer_labels", None) custom_labels = getattr(state.obj, "custom_labels", None)
labels = ( labels = (
{**self.metrics_collector.labels, **customer_labels} {**self.metrics_collector.labels, **custom_labels}
if customer_labels if custom_labels
else self.metrics_collector.labels else self.metrics_collector.labels
) )
if ( if (
......
...@@ -44,7 +44,7 @@ def generate_buckets( ...@@ -44,7 +44,7 @@ def generate_buckets(
return two_sides_exponential_buckets(float(middle), float(base), int(count)) return two_sides_exponential_buckets(float(middle), float(base), int(count))
if rule == "default": if rule == "default":
return sorted(set(default_buckets)) return sorted(set(default_buckets))
assert rule == "customer" assert rule == "custom"
return sorted(set([float(x) for x in buckets_rule[1:]])) return sorted(set([float(x) for x in buckets_rule[1:]]))
......
...@@ -213,8 +213,8 @@ class ServerArgs: ...@@ -213,8 +213,8 @@ class ServerArgs:
show_time_cost: bool = False show_time_cost: bool = False
enable_metrics: bool = False enable_metrics: bool = False
enable_metrics_for_all_schedulers: bool = False enable_metrics_for_all_schedulers: bool = False
tokenizer_metrics_custom_labels_header: str = "x-customer-labels" tokenizer_metrics_custom_labels_header: str = "x-custom-labels"
tokenizer_metrics_allowed_customer_labels: Optional[List[str]] = None tokenizer_metrics_allowed_custom_labels: Optional[List[str]] = None
bucket_time_to_first_token: Optional[List[float]] = None bucket_time_to_first_token: Optional[List[float]] = None
bucket_inter_token_latency: Optional[List[float]] = None bucket_inter_token_latency: Optional[List[float]] = None
bucket_e2e_request_latency: Optional[List[float]] = None bucket_e2e_request_latency: Optional[List[float]] = None
...@@ -1077,10 +1077,10 @@ class ServerArgs: ...@@ -1077,10 +1077,10 @@ class ServerArgs:
def _handle_metrics_labels(self): def _handle_metrics_labels(self):
if ( if (
not self.tokenizer_metrics_custom_labels_header not self.tokenizer_metrics_custom_labels_header
and self.tokenizer_metrics_allowed_customer_labels and self.tokenizer_metrics_allowed_custom_labels
): ):
raise ValueError( raise ValueError(
"Please set --tokenizer-metrics-custom-labels-header when setting --tokenizer-metrics-allowed-customer-labels." "Please set --tokenizer-metrics-custom-labels-header when setting --tokenizer-metrics-allowed-custom-labels."
) )
def _handle_deterministic_inference(self): def _handle_deterministic_inference(self):
...@@ -1535,16 +1535,16 @@ class ServerArgs: ...@@ -1535,16 +1535,16 @@ class ServerArgs:
"--tokenizer-metrics-custom-labels-header", "--tokenizer-metrics-custom-labels-header",
type=str, type=str,
default=ServerArgs.tokenizer_metrics_custom_labels_header, default=ServerArgs.tokenizer_metrics_custom_labels_header,
help="Specify the HTTP header for passing customer labels for tokenizer metrics.", help="Specify the HTTP header for passing custom labels for tokenizer metrics.",
) )
parser.add_argument( parser.add_argument(
"--tokenizer-metrics-allowed-customer-labels", "--tokenizer-metrics-allowed-custom-labels",
type=str, type=str,
nargs="+", nargs="+",
default=ServerArgs.tokenizer_metrics_allowed_customer_labels, default=ServerArgs.tokenizer_metrics_allowed_custom_labels,
help="The customer labels allowed for tokenizer metrics. The labels are specified via a dict in " help="The custom labels allowed for tokenizer metrics. The labels are specified via a dict in "
"'--tokenizer-metrics-custom-labels-header' field in HTTP requests, e.g., {'label1': 'value1', 'label2': " "'--tokenizer-metrics-custom-labels-header' field in HTTP requests, e.g., {'label1': 'value1', 'label2': "
"'value2'} is allowed if '--tokenizer-metrics-allowed-labels label1 label2' is set.", "'value2'} is allowed if '--tokenizer-metrics-allowed-custom-labels label1 label2' is set.",
) )
parser.add_argument( parser.add_argument(
"--bucket-time-to-first-token", "--bucket-time-to-first-token",
...@@ -1576,8 +1576,8 @@ class ServerArgs: ...@@ -1576,8 +1576,8 @@ class ServerArgs:
bucket_rule = ( bucket_rule = (
"Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' " "Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' "
"generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets " "generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets "
"[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> " "[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> "
"<value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500')." "<value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500')."
) )
parser.add_argument( parser.add_argument(
"--prompt-tokens-buckets", "--prompt-tokens-buckets",
...@@ -2857,8 +2857,8 @@ class ServerArgs: ...@@ -2857,8 +2857,8 @@ class ServerArgs:
assert rule in [ assert rule in [
"tse", "tse",
"default", "default",
"customer", "custom",
], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'customer'" ], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'custom'"
if rule == "tse": if rule == "tse":
assert ( assert (
...@@ -2881,20 +2881,20 @@ class ServerArgs: ...@@ -2881,20 +2881,20 @@ class ServerArgs:
len(buckets_rule) == 1 len(buckets_rule) == 1
), f"{arg_name} default rule should only have one parameter: ['default'], got {len(buckets_rule)}" ), f"{arg_name} default rule should only have one parameter: ['default'], got {len(buckets_rule)}"
elif rule == "customer": elif rule == "custom":
assert ( assert (
len(buckets_rule) >= 2 len(buckets_rule) >= 2
), f"{arg_name} customer rule requires at least one bucket value: ['customer', value1, ...]" ), f"{arg_name} custom rule requires at least one bucket value: ['custom', value1, ...]"
try: try:
bucket_values = [float(x) for x in buckets_rule[1:]] bucket_values = [float(x) for x in buckets_rule[1:]]
except ValueError: except ValueError:
assert False, f"{arg_name} customer rule bucket values must be numeric" assert False, f"{arg_name} custom rule bucket values must be numeric"
assert len(set(bucket_values)) == len( assert len(set(bucket_values)) == len(
bucket_values bucket_values
), f"{arg_name} customer rule bucket values should not contain duplicates" ), f"{arg_name} custom rule bucket values should not contain duplicates"
assert all( assert all(
val >= 0 for val in bucket_values val >= 0 for val in bucket_values
), f"{arg_name} customer rule bucket values should be non-negative" ), f"{arg_name} custom rule bucket values should be non-negative"
def adjust_mem_fraction_for_vlm(self, model_config): def adjust_mem_fraction_for_vlm(self, model_config):
vision_config = getattr(model_config.hf_config, "vision_config", None) vision_config = getattr(model_config.hf_config, "vision_config", None)
......
...@@ -81,23 +81,23 @@ class TestMetricsUtils(unittest.TestCase): ...@@ -81,23 +81,23 @@ class TestMetricsUtils(unittest.TestCase):
expected = two_sides_exponential_buckets(10.0, 2.0, 4) expected = two_sides_exponential_buckets(10.0, 2.0, 4)
self.assertEqual(result, expected) self.assertEqual(result, expected)
def test_generate_buckets_customer(self): def test_generate_buckets_custom(self):
"""Test generate_buckets with customer rule.""" """Test generate_buckets with custom rule."""
default_buckets = [1.0, 5.0, 10.0] default_buckets = [1.0, 5.0, 10.0]
# Test with "customer" rule # Test with "custom" rule
result = generate_buckets( result = generate_buckets(
["customer", "1.5", "3.2", "7.8", "15.6"], default_buckets ["custom", "1.5", "3.2", "7.8", "15.6"], default_buckets
) )
expected = [1.5, 3.2, 7.8, 15.6] expected = [1.5, 3.2, 7.8, 15.6]
self.assertEqual(result, expected) self.assertEqual(result, expected)
def test_generate_buckets_customer_with_integers(self): def test_generate_buckets_custom_with_integers(self):
"""Test generate_buckets with customer rule using integer strings.""" """Test generate_buckets with custom rule using integer strings."""
default_buckets = [1.0, 5.0, 10.0] default_buckets = [1.0, 5.0, 10.0]
# Test with integer strings # Test with integer strings
result = generate_buckets(["customer", "1", "5", "10", "50"], default_buckets) result = generate_buckets(["custom", "1", "5", "10", "50"], default_buckets)
expected = [1.0, 5.0, 10.0, 50.0] expected = [1.0, 5.0, 10.0, 50.0]
self.assertEqual(result, expected) self.assertEqual(result, expected)
...@@ -110,9 +110,9 @@ class TestMetricsUtils(unittest.TestCase): ...@@ -110,9 +110,9 @@ class TestMetricsUtils(unittest.TestCase):
self.assertEqual(result, default_buckets) self.assertEqual(result, default_buckets)
self.assertIsInstance(result, list) self.assertIsInstance(result, list)
# Test customer rule with proper float conversion # Test custom rule with proper float conversion
result = generate_buckets( result = generate_buckets(
["customer", "100", "50", "10", "5", "1"], default_buckets ["custom", "100", "50", "10", "5", "1"], default_buckets
) )
expected = [1.0, 5.0, 10.0, 50.0, 100.0] expected = [1.0, 5.0, 10.0, 50.0, 100.0]
self.assertEqual(result, expected) self.assertEqual(result, expected)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment