Unverified Commit 43a62c51 authored by Daniel Serebrenik's avatar Daniel Serebrenik Committed by GitHub
Browse files

Add more documentation and improve usability of lognormal dist...


Add more documentation and improve usability of lognormal dist (benchmark_serving_multi_turn) (#23255)
Signed-off-by: default avatardaniels <daniels@pliops.com>
parent ca2d1925
......@@ -55,6 +55,107 @@ output_num_chunks 166.0 99.01 11.80 79.00 90.00 98.00 108.75
----------------------------------------------------------------------------------------------------
```
### JSON configuration file for synthetic conversations generation
The input flag `--input-file` is used to determine the input conversations for the benchmark.<br/>
When the input is a JSON file with the field `"filetype": "generate_conversations"` the tool will generate synthetic multi-turn (questions and answers) conversations.
The file `generate_multi_turn.json` is an example file.
The file must contain the sections `prompt_input` and `prompt_output`.
The `prompt_input` section must contain `num_turns`, `prefix_num_tokens` and `num_tokens`:
* `num_turns` - Number of total turns in the conversation (both user & assistant).<br/>
The final value will always be rounded to an even number so each user turn has a reply.
* `prefix_num_tokens` - Tokens added at the start of only the **first user turn** in a conversation (unique per conversation).
* `num_tokens` - Total token length of each **user** message (one turn).
The `prompt_output` section must contain `num_tokens`:
* `num_tokens` - Total token length of each **assistant** message (one turn).
### Random distributions for synthetic conversations generation
When creating an input JSON file (such as `generate_multi_turn.json`),<br/>
every numeric field (such as `num_turns` or `num_tokens`) requires a distribution.<br/>
The distribution determines how to randomly sample values for the field.
The available distributions are listed below.
**Note:** The optional `max` field (for lognormal, zipf, and poisson) can be used to cap sampled values at an upper bound.</br>
Can be used to make sure that the total number of tokens in every request does not exceed `--max-model-len`.
#### constant
```json
{
"distribution": "constant",
"value": 500
}
```
* `value` - the fixed integer value (always returns the same number).
#### uniform
```json
{
"distribution": "uniform",
"min": 12,
"max": 18
}
```
* `min` - minimum value (inclusive).
* `max` - maximum value (inclusive), should be equal or larger than min.
#### lognormal
```json
{
"distribution": "lognormal",
"average": 1000,
"max": 5000
}
```
You can parameterize the lognormal distribution in one of two ways:
Using the average and optional median ratio:
* `average` - target average value of the distribution.
* `median_ratio` - the ratio of the median to the average; controls the skewness. Must be in the range (0, 1).
Using the parameters of the underlying normal distribution:
* `mean` - mean of the underlying normal distribution.
* `sigma` - standard deviation of the underlying normal distribution.
#### zipf
```json
{
"distribution": "zipf",
"alpha": 1.2,
"max": 100
}
```
* `alpha` - skew parameter (> 1). Larger values produce stronger skew toward smaller integers.
#### poisson
```json
{
"distribution": "poisson",
"alpha": 10,
"max": 50
}
```
* `alpha` - expected value (λ). Also the variance of the distribution.
## ShareGPT Conversations
To run with the ShareGPT data, download the following ShareGPT dataset:
......
......@@ -99,21 +99,105 @@ class PoissonDistribution(Distribution):
class LognormalDistribution(Distribution):
def __init__(
self, mean: float, sigma: float, max_val: Optional[int] = None
self,
mean: Optional[float] = None,
sigma: Optional[float] = None,
average: Optional[int] = None,
median_ratio: Optional[float] = None,
max_val: Optional[int] = None,
) -> None:
self.average = average
self.median_ratio = median_ratio
self.max_val = max_val
if average is not None:
if average < 1:
raise ValueError("Lognormal average must be positive")
if mean or sigma:
raise ValueError(
"When using lognormal average, you can't provide mean/sigma"
)
if self.median_ratio is None:
# Default value that provides relatively wide range of values
self.median_ratio = 0.85
# Calculate mean/sigma of np.random.lognormal based on the average
mean, sigma = self._generate_lognormal_by_median(
target_average=self.average, median_ratio=self.median_ratio
)
else:
if mean is None or sigma is None:
raise ValueError(
"Must provide both mean and sigma if average is not used"
)
if mean <= 0 or sigma < 0:
raise ValueError(
"Lognormal mean must be positive and sigma must be non-negative"
)
# Mean and standard deviation of the underlying normal distribution
# Based on numpy.random.lognormal
self.mean = mean
self.sigma = sigma
self.max_val = max_val
@staticmethod
def _generate_lognormal_by_median(
target_average: int, median_ratio: float
) -> tuple[float, float]:
"""
Compute (mu, sigma) for a lognormal distribution given:
- a target average (mean of the distribution)
- a ratio of median / mean (controls skewness), assume mean > median
Background:
If Z ~ Normal(mu, sigma^2), then X = exp(Z) ~ LogNormal(mu, sigma).
* mean(X) = exp(mu + sigma^2 / 2)
* median(X) = exp(mu)
So:
median / mean = exp(mu) / exp(mu + sigma^2 / 2)
= exp(-sigma^2 / 2)
Rearranging:
sigma^2 = 2 * ln(mean / median)
mu = ln(median)
This gives a unique (mu, sigma) for any valid mean and median.
"""
# Check input validity: median must be smaller than mean
if median_ratio <= 0 or median_ratio >= 1:
raise ValueError("median_ratio must be in range (0, 1)")
target_median = target_average * median_ratio
# Solve sigma^2 = 2 * ln(mean / median)
sigma = np.sqrt(2 * np.log(target_average / target_median))
mu = np.log(target_median)
return mu, sigma
def sample(self, size: int = 1) -> np.ndarray:
samples = np.random.lognormal(mean=self.mean, sigma=self.sigma, size=size)
if self.average is not None:
# Scale to average
samples *= self.average / samples.mean()
if self.max_val:
samples = np.minimum(samples, self.max_val)
return np.round(samples).astype(int)
def __repr__(self) -> str:
return f"LognormalDistribution[{self.mean}, {self.sigma}]"
if self.average:
return (
f"LognormalDistribution[{self.average}, "
f"{self.median_ratio}, {self.max_val}]"
)
return f"LognormalDistribution[{self.mean}, {self.sigma}, {self.max_val}]"
class GenConvArgs(NamedTuple):
......@@ -173,10 +257,21 @@ def get_random_distribution(
return PoissonDistribution(conf["alpha"], max_val=max_val)
elif distribution == "lognormal":
max_val = conf.get("max", None)
if "average" in conf:
# Infer lognormal mean/sigma (numpy) from input average
median_ratio = conf.get("median_ratio", None)
return LognormalDistribution(
average=conf["average"], median_ratio=median_ratio, max_val=max_val
)
# Use mean/sigma directly (for full control over the distribution)
verify_field_exists(conf, "mean", section, subsection)
verify_field_exists(conf, "sigma", section, subsection)
max_val = conf.get("max", None)
return LognormalDistribution(conf["mean"], conf["sigma"], max_val=max_val)
return LognormalDistribution(
mean=conf["mean"], sigma=conf["sigma"], max_val=max_val
)
elif distribution == "uniform":
verify_field_exists(conf, "min", section, subsection)
......
......@@ -15,9 +15,8 @@
},
"prefix_num_tokens": {
"distribution": "lognormal",
"mean": 6,
"sigma": 4,
"max": 1500
"average": 1000,
"max": 5000
},
"num_tokens": {
"distribution": "uniform",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment