Unverified Commit f68dd998 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Rename customer label -> custom label (#10899)


Co-authored-by: default avatarYingchun Lai <laiyingchun@apache.org>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 35ec2a45
......@@ -164,8 +164,8 @@ Please consult the documentation below and [server_args.py](https://github.com/s
| `--kv-events-config` | Config in json format for NVIDIA dynamo KV event publishing. Publishing will be enabled if this flag is used. | None |
| `--decode-log-interval` | The log interval of decode batch. | 40 |
| `--enable-request-time-stats-logging` | Enable per request time stats logging. | False |
| `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> <value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None |
| `--generation-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> <value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500'). | None |
| `--prompt-tokens-buckets` | The buckets rule of prompt tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> <value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500'). | None |
| `--generation-tokens-buckets` | The buckets rule of generation tokens. Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets [984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> <value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500'). | None |
## API related
......
......@@ -235,8 +235,8 @@ class CompletionRequest(BaseModel):
# Priority for the request
priority: Optional[int] = None
# For customer metric labels
customer_labels: Optional[Dict[str, str]] = None
# For custom metric labels
custom_labels: Optional[Dict[str, str]] = None
@field_validator("max_tokens")
@classmethod
......
......@@ -27,10 +27,10 @@ class OpenAIServingBase(ABC):
self.tokenizer_manager = tokenizer_manager
self.allowed_custom_labels = (
set(
self.tokenizer_manager.server_args.tokenizer_metrics_allowed_customer_labels
self.tokenizer_manager.server_args.tokenizer_metrics_allowed_custom_labels
)
if isinstance(self.tokenizer_manager.server_args, ServerArgs)
and self.tokenizer_manager.server_args.tokenizer_metrics_allowed_customer_labels
and self.tokenizer_manager.server_args.tokenizer_metrics_allowed_custom_labels
else None
)
......@@ -178,14 +178,14 @@ class OpenAIServingBase(ABC):
)
return json.dumps({"error": error.model_dump()})
def extract_customer_labels(self, raw_request):
def extract_custom_labels(self, raw_request):
if (
not self.allowed_custom_labels
or not self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header
):
return None
customer_labels = None
custom_labels = None
header = (
self.tokenizer_manager.server_args.tokenizer_metrics_custom_labels_header
)
......@@ -200,9 +200,9 @@ class OpenAIServingBase(ABC):
raw_labels = None
if isinstance(raw_labels, dict):
customer_labels = {
custom_labels = {
label: value
for label, value in raw_labels.items()
if label in self.allowed_custom_labels
}
return customer_labels
return custom_labels
......@@ -128,8 +128,8 @@ class OpenAIServingChat(OpenAIServingBase):
else:
prompt_kwargs = {"input_ids": processed_messages.prompt_ids}
# Extract customer labels from raw request headers
customer_labels = self.extract_customer_labels(raw_request)
# Extract custom labels from raw request headers
custom_labels = self.extract_custom_labels(raw_request)
adapted_request = GenerateReqInput(
**prompt_kwargs,
......@@ -151,7 +151,7 @@ class OpenAIServingChat(OpenAIServingBase):
rid=request.rid,
extra_key=self._compute_extra_key(request),
priority=request.priority,
customer_labels=customer_labels,
custom_labels=custom_labels,
)
return adapted_request, request
......
......@@ -90,8 +90,8 @@ class OpenAIServingCompletion(OpenAIServingBase):
else:
prompt_kwargs = {"input_ids": prompt}
# Extract customer labels from raw request headers
customer_labels = self.extract_customer_labels(raw_request)
# Extract custom labels from raw request headers
custom_labels = self.extract_custom_labels(raw_request)
adapted_request = GenerateReqInput(
**prompt_kwargs,
......@@ -109,7 +109,7 @@ class OpenAIServingCompletion(OpenAIServingBase):
rid=request.rid,
extra_key=self._compute_extra_key(request),
priority=request.priority,
customer_labels=customer_labels,
custom_labels=custom_labels,
)
return adapted_request, request
......
......@@ -143,8 +143,8 @@ class GenerateReqInput:
# Image gen grpc migration
return_bytes: bool = False
# For customer metric labels
customer_labels: Optional[Dict[str, str]] = None
# For custom metric labels
custom_labels: Optional[Dict[str, str]] = None
def contains_mm_input(self) -> bool:
return (
......
......@@ -320,8 +320,8 @@ class TokenizerManager(TokenizerCommunicatorMixin):
"model_name": self.server_args.served_model_name,
# TODO: Add lora name/path in the future,
}
if server_args.tokenizer_metrics_allowed_customer_labels:
for label in server_args.tokenizer_metrics_allowed_customer_labels:
if server_args.tokenizer_metrics_allowed_custom_labels:
for label in server_args.tokenizer_metrics_allowed_custom_labels:
labels[label] = ""
self.metrics_collector = TokenizerMetricsCollector(
server_args=server_args,
......@@ -1633,10 +1633,10 @@ class TokenizerManager(TokenizerCommunicatorMixin):
else 0
)
customer_labels = getattr(state.obj, "customer_labels", None)
custom_labels = getattr(state.obj, "custom_labels", None)
labels = (
{**self.metrics_collector.labels, **customer_labels}
if customer_labels
{**self.metrics_collector.labels, **custom_labels}
if custom_labels
else self.metrics_collector.labels
)
if (
......
......@@ -44,7 +44,7 @@ def generate_buckets(
return two_sides_exponential_buckets(float(middle), float(base), int(count))
if rule == "default":
return sorted(set(default_buckets))
assert rule == "customer"
assert rule == "custom"
return sorted(set([float(x) for x in buckets_rule[1:]]))
......
......@@ -213,8 +213,8 @@ class ServerArgs:
show_time_cost: bool = False
enable_metrics: bool = False
enable_metrics_for_all_schedulers: bool = False
tokenizer_metrics_custom_labels_header: str = "x-customer-labels"
tokenizer_metrics_allowed_customer_labels: Optional[List[str]] = None
tokenizer_metrics_custom_labels_header: str = "x-custom-labels"
tokenizer_metrics_allowed_custom_labels: Optional[List[str]] = None
bucket_time_to_first_token: Optional[List[float]] = None
bucket_inter_token_latency: Optional[List[float]] = None
bucket_e2e_request_latency: Optional[List[float]] = None
......@@ -1077,10 +1077,10 @@ class ServerArgs:
def _handle_metrics_labels(self):
if (
not self.tokenizer_metrics_custom_labels_header
and self.tokenizer_metrics_allowed_customer_labels
and self.tokenizer_metrics_allowed_custom_labels
):
raise ValueError(
"Please set --tokenizer-metrics-custom-labels-header when setting --tokenizer-metrics-allowed-customer-labels."
"Please set --tokenizer-metrics-custom-labels-header when setting --tokenizer-metrics-allowed-custom-labels."
)
def _handle_deterministic_inference(self):
......@@ -1535,16 +1535,16 @@ class ServerArgs:
"--tokenizer-metrics-custom-labels-header",
type=str,
default=ServerArgs.tokenizer_metrics_custom_labels_header,
help="Specify the HTTP header for passing customer labels for tokenizer metrics.",
help="Specify the HTTP header for passing custom labels for tokenizer metrics.",
)
parser.add_argument(
"--tokenizer-metrics-allowed-customer-labels",
"--tokenizer-metrics-allowed-custom-labels",
type=str,
nargs="+",
default=ServerArgs.tokenizer_metrics_allowed_customer_labels,
help="The customer labels allowed for tokenizer metrics. The labels are specified via a dict in "
default=ServerArgs.tokenizer_metrics_allowed_custom_labels,
help="The custom labels allowed for tokenizer metrics. The labels are specified via a dict in "
"'--tokenizer-metrics-custom-labels-header' field in HTTP requests, e.g., {'label1': 'value1', 'label2': "
"'value2'} is allowed if '--tokenizer-metrics-allowed-labels label1 label2' is set.",
"'value2'} is allowed if '--tokenizer-metrics-allowed-custom-labels label1 label2' is set.",
)
parser.add_argument(
"--bucket-time-to-first-token",
......@@ -1576,8 +1576,8 @@ class ServerArgs:
bucket_rule = (
"Supports 3 rule types: 'default' uses predefined buckets; 'tse <middle> <base> <count>' "
"generates two sides exponential distributed buckets (e.g., 'tse 1000 2 8' generates buckets "
"[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'customer <value1> "
"<value2> ...' uses custom bucket values (e.g., 'customer 10 50 100 500')."
"[984.0, 992.0, 996.0, 998.0, 1000.0, 1002.0, 1004.0, 1008.0, 1016.0]).); 'custom <value1> "
"<value2> ...' uses custom bucket values (e.g., 'custom 10 50 100 500')."
)
parser.add_argument(
"--prompt-tokens-buckets",
......@@ -2857,8 +2857,8 @@ class ServerArgs:
assert rule in [
"tse",
"default",
"customer",
], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'customer'"
"custom",
], f"Unsupported {arg_name} rule type: '{rule}'. Must be one of: 'tse', 'default', 'custom'"
if rule == "tse":
assert (
......@@ -2881,20 +2881,20 @@ class ServerArgs:
len(buckets_rule) == 1
), f"{arg_name} default rule should only have one parameter: ['default'], got {len(buckets_rule)}"
elif rule == "customer":
elif rule == "custom":
assert (
len(buckets_rule) >= 2
), f"{arg_name} customer rule requires at least one bucket value: ['customer', value1, ...]"
), f"{arg_name} custom rule requires at least one bucket value: ['custom', value1, ...]"
try:
bucket_values = [float(x) for x in buckets_rule[1:]]
except ValueError:
assert False, f"{arg_name} customer rule bucket values must be numeric"
assert False, f"{arg_name} custom rule bucket values must be numeric"
assert len(set(bucket_values)) == len(
bucket_values
), f"{arg_name} customer rule bucket values should not contain duplicates"
), f"{arg_name} custom rule bucket values should not contain duplicates"
assert all(
val >= 0 for val in bucket_values
), f"{arg_name} customer rule bucket values should be non-negative"
), f"{arg_name} custom rule bucket values should be non-negative"
def adjust_mem_fraction_for_vlm(self, model_config):
vision_config = getattr(model_config.hf_config, "vision_config", None)
......
......@@ -81,23 +81,23 @@ class TestMetricsUtils(unittest.TestCase):
expected = two_sides_exponential_buckets(10.0, 2.0, 4)
self.assertEqual(result, expected)
def test_generate_buckets_customer(self):
"""Test generate_buckets with customer rule."""
def test_generate_buckets_custom(self):
"""Test generate_buckets with custom rule."""
default_buckets = [1.0, 5.0, 10.0]
# Test with "customer" rule
# Test with "custom" rule
result = generate_buckets(
["customer", "1.5", "3.2", "7.8", "15.6"], default_buckets
["custom", "1.5", "3.2", "7.8", "15.6"], default_buckets
)
expected = [1.5, 3.2, 7.8, 15.6]
self.assertEqual(result, expected)
def test_generate_buckets_customer_with_integers(self):
"""Test generate_buckets with customer rule using integer strings."""
def test_generate_buckets_custom_with_integers(self):
"""Test generate_buckets with custom rule using integer strings."""
default_buckets = [1.0, 5.0, 10.0]
# Test with integer strings
result = generate_buckets(["customer", "1", "5", "10", "50"], default_buckets)
result = generate_buckets(["custom", "1", "5", "10", "50"], default_buckets)
expected = [1.0, 5.0, 10.0, 50.0]
self.assertEqual(result, expected)
......@@ -110,9 +110,9 @@ class TestMetricsUtils(unittest.TestCase):
self.assertEqual(result, default_buckets)
self.assertIsInstance(result, list)
# Test customer rule with proper float conversion
# Test custom rule with proper float conversion
result = generate_buckets(
["customer", "100", "50", "10", "5", "1"], default_buckets
["custom", "100", "50", "10", "5", "1"], default_buckets
)
expected = [1.0, 5.0, 10.0, 50.0, 100.0]
self.assertEqual(result, expected)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment