Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6f8d2618
Unverified
Commit
6f8d2618
authored
Jul 29, 2025
by
Louie Tsai
Committed by
GitHub
Jul 30, 2025
Browse files
Update vLLM Benchmark Suite for Xeon based on 0.9.2 release (#21486)
Signed-off-by:
Tsai, Louie
<
louie.tsai@intel.com
>
parent
4cd7fe6c
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
437 additions
and
1 deletion
+437
-1
.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
...ly-benchmarks/scripts/convert-results-json-to-markdown.py
+1
-0
.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
.../nightly-benchmarks/scripts/run-performance-benchmarks.sh
+1
-1
.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
...kite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
+209
-0
.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json
...kite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json
+211
-0
.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
+15
-0
No files found.
.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py
View file @
6f8d2618
...
@@ -44,6 +44,7 @@ serving_column_mapping = {
...
@@ -44,6 +44,7 @@ serving_column_mapping = {
"test_name"
:
"Test name"
,
"test_name"
:
"Test name"
,
"gpu_type"
:
"GPU"
,
"gpu_type"
:
"GPU"
,
"completed"
:
"# of req."
,
"completed"
:
"# of req."
,
"max_concurrency"
:
"# of max concurrency."
,
"request_throughput"
:
"Tput (req/s)"
,
"request_throughput"
:
"Tput (req/s)"
,
"total_token_throughput"
:
"Total Token Tput (tok/s)"
,
"total_token_throughput"
:
"Total Token Tput (tok/s)"
,
"output_throughput"
:
"Output Tput (tok/s)"
,
"output_throughput"
:
"Output Tput (tok/s)"
,
...
...
.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh
View file @
6f8d2618
...
@@ -33,7 +33,7 @@ check_gpus() {
...
@@ -33,7 +33,7 @@ check_gpus() {
check_cpus
()
{
check_cpus
()
{
# check the number of CPUs and NUMA Node and GPU type.
# check the number of CPUs and NUMA Node and GPU type.
declare
-g
numa_count
=
$(
python3
-c
"from numa import info;numa_size = info.get_num_configured_nodes(); print(numa_size)"
)
declare
-g
numa_count
=
$(
lscpu |
grep
"NUMA node(s):"
|
awk
'{print $3}'
)
if
[[
$numa_count
-gt
0
]]
;
then
if
[[
$numa_count
-gt
0
]]
;
then
echo
"NUMA found."
echo
"NUMA found."
echo
$numa_count
echo
$numa_count
...
...
.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc2.json
0 → 100644
View file @
6f8d2618
[
{
"test_name"
:
"serving_llama8B_tp1_sharegpt"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"tensor_parallel_size"
:
1
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"sharegpt"
,
"dataset_path"
:
"./ShareGPT_V3_unfiltered_cleaned_split.json"
,
"max_concurrency"
:
60
,
"num_prompts"
:
200
}
},
{
"test_name"
:
"serving_llama8B_tp2_sharegpt"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"tensor_parallel_size"
:
2
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"sharegpt"
,
"dataset_path"
:
"./ShareGPT_V3_unfiltered_cleaned_split.json"
,
"max_concurrency"
:
60
,
"num_prompts"
:
200
}
},
{
"test_name"
:
"serving_llama8B_tp4_sharegpt"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"tensor_parallel_size"
:
4
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"sharegpt"
,
"dataset_path"
:
"./ShareGPT_V3_unfiltered_cleaned_split.json"
,
"max_concurrency"
:
60
,
"num_prompts"
:
200
}
},
{
"test_name"
:
"serving_llama8B_tp1_random_128_128"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"tensor_parallel_size"
:
1
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"enable_chunked_prefill"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"random"
,
"random-input-len"
:
128
,
"random-output-len"
:
128
,
"ignore-eos"
:
""
,
"max_concurrency"
:
1000
,
"num_prompts"
:
1000
}
},
{
"test_name"
:
"serving_llama8B_tp2_random_128_128"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"tensor_parallel_size"
:
2
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"enable_chunked_prefill"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"random"
,
"random-input-len"
:
128
,
"random-output-len"
:
128
,
"ignore-eos"
:
""
,
"max_concurrency"
:
1000
,
"num_prompts"
:
1000
}
},
{
"test_name"
:
"serving_llama8B_tp4_random_128_128"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"tensor_parallel_size"
:
4
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"enable_chunked_prefill"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"random"
,
"random-input-len"
:
128
,
"random-output-len"
:
128
,
"ignore-eos"
:
""
,
"max_concurrency"
:
1000
,
"num_prompts"
:
1000
}
}
]
.buildkite/nightly-benchmarks/tests/serving-tests-cpu-snc3.json
0 → 100644
View file @
6f8d2618
[
{
"test_name"
:
"serving_llama8B_pp1_sharegpt"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"pipeline_parallel_size"
:
1
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"sharegpt"
,
"dataset_path"
:
"./ShareGPT_V3_unfiltered_cleaned_split.json"
,
"max_concurrency"
:
60
,
"num_prompts"
:
200
}
},
{
"test_name"
:
"serving_llama8B_pp3_sharegpt"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"pipeline_parallel_size"
:
3
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"sharegpt"
,
"dataset_path"
:
"./ShareGPT_V3_unfiltered_cleaned_split.json"
,
"max_concurrency"
:
60
,
"num_prompts"
:
200
}
},
{
"test_name"
:
"serving_llama8B_tp2pp6_sharegpt"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"tensor_parallel_size"
:
2
,
"pipeline_parallel_size"
:
3
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"sharegpt"
,
"dataset_path"
:
"./ShareGPT_V3_unfiltered_cleaned_split.json"
,
"max_concurrency"
:
60
,
"num_prompts"
:
200
}
},
{
"test_name"
:
"serving_llama8B_pp1_random_128_128"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"pipeline_parallel_size"
:
1
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"enable_chunked_prefill"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"random"
,
"random-input-len"
:
128
,
"random-output-len"
:
128
,
"ignore-eos"
:
""
,
"max_concurrency"
:
1000
,
"num_prompts"
:
1000
}
},
{
"test_name"
:
"serving_llama8B_pp3_random_128_128"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL:"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"pipeline_parallel_size"
:
3
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"enable_chunked_prefill"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"random"
,
"random-input-len"
:
128
,
"random-output-len"
:
128
,
"ignore-eos"
:
""
,
"max_concurrency"
:
1000
,
"num_prompts"
:
1000
}
},
{
"test_name"
:
"serving_llama8B_tp2pp3_random_128_128"
,
"qps_list"
:
[
1
,
4
,
16
,
"inf"
],
"server_environment_variables"
:
{
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
"server_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"tensor_parallel_size"
:
2
,
"pipeline_parallel_size"
:
3
,
"dtype"
:
"bfloat16"
,
"distributed_executor_backend"
:
"mp"
,
"block_size"
:
128
,
"trust_remote_code"
:
""
,
"enable_chunked_prefill"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
},
"client_parameters"
:
{
"model"
:
"meta-llama/Meta-Llama-3.1-8B-Instruct"
,
"backend"
:
"vllm"
,
"dataset_name"
:
"random"
,
"random-input-len"
:
128
,
"random-output-len"
:
128
,
"ignore-eos"
:
""
,
"max_concurrency"
:
1000
,
"num_prompts"
:
1000
}
}
]
.buildkite/nightly-benchmarks/tests/serving-tests-cpu.json
View file @
6f8d2618
...
@@ -6,6 +6,7 @@
...
@@ -6,6 +6,7 @@
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
},
"server_parameters"
:
{
"server_parameters"
:
{
...
@@ -18,6 +19,8 @@
...
@@ -18,6 +19,8 @@
"disable_log_stats"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
"load_format"
:
"dummy"
},
},
"client_parameters"
:
{
"client_parameters"
:
{
...
@@ -36,6 +39,7 @@
...
@@ -36,6 +39,7 @@
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
},
"server_parameters"
:
{
"server_parameters"
:
{
...
@@ -48,6 +52,8 @@
...
@@ -48,6 +52,8 @@
"disable_log_stats"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
"load_format"
:
"dummy"
},
},
"client_parameters"
:
{
"client_parameters"
:
{
...
@@ -66,6 +72,7 @@
...
@@ -66,6 +72,7 @@
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
},
"server_parameters"
:
{
"server_parameters"
:
{
...
@@ -78,6 +85,8 @@
...
@@ -78,6 +85,8 @@
"disable_log_stats"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
"load_format"
:
"dummy"
},
},
"client_parameters"
:
{
"client_parameters"
:
{
...
@@ -96,6 +105,7 @@
...
@@ -96,6 +105,7 @@
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
},
"server_parameters"
:
{
"server_parameters"
:
{
...
@@ -109,6 +119,8 @@
...
@@ -109,6 +119,8 @@
"disable_log_stats"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
"load_format"
:
"dummy"
},
},
"client_parameters"
:
{
"client_parameters"
:
{
...
@@ -129,6 +141,7 @@
...
@@ -129,6 +141,7 @@
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_RPC_TIMEOUT"
:
100000
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ALLOW_LONG_MAX_MODEL_LEN"
:
1
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_ENGINE_ITERATION_TIMEOUT_S"
:
120
,
"VLLM_CPU_SGL_KERNEL"
:
1
,
"VLLM_CPU_KVCACHE_SPACE"
:
40
"VLLM_CPU_KVCACHE_SPACE"
:
40
},
},
"server_parameters"
:
{
"server_parameters"
:
{
...
@@ -142,6 +155,8 @@
...
@@ -142,6 +155,8 @@
"disable_log_stats"
:
""
,
"disable_log_stats"
:
""
,
"disable_log_requests"
:
""
,
"disable_log_requests"
:
""
,
"enforce_eager"
:
""
,
"enforce_eager"
:
""
,
"max_num_batched_tokens"
:
2048
,
"max_num_seqs"
:
256
,
"load_format"
:
"dummy"
"load_format"
:
"dummy"
},
},
"client_parameters"
:
{
"client_parameters"
:
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment