Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4eabe123
Commit
4eabe123
authored
May 28, 2025
by
zhuwenwen
Browse files
Merge remote-tracking branch 'mirror/releases/v0.9.0' into v0.9.0-ori
parents
45840cd2
58738772
Changes
670
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
595 additions
and
252 deletions
+595
-252
tests/tensorizer_loader/test_tensorizer.py
tests/tensorizer_loader/test_tensorizer.py
+47
-115
tests/test_logger.py
tests/test_logger.py
+37
-1
tests/test_outputs.py
tests/test_outputs.py
+14
-0
tests/test_regression.py
tests/test_regression.py
+3
-0
tests/tokenization/test_mistral_tokenizer.py
tests/tokenization/test_mistral_tokenizer.py
+110
-6
tests/tool_use/test_tool_choice_required.py
tests/tool_use/test_tool_choice_required.py
+2
-2
tests/tool_use/utils.py
tests/tool_use/utils.py
+1
-1
tests/v1/core/test_scheduler_e2e.py
tests/v1/core/test_scheduler_e2e.py
+10
-1
tests/v1/engine/test_llm_engine.py
tests/v1/engine/test_llm_engine.py
+65
-0
tests/v1/entrypoints/llm/test_struct_output_generate.py
tests/v1/entrypoints/llm/test_struct_output_generate.py
+1
-1
tests/v1/entrypoints/openai/test_completion.py
tests/v1/entrypoints/openai/test_completion.py
+1
-1
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
+11
-2
tests/v1/kv_connector/unit/test_multi_connector.py
tests/v1/kv_connector/unit/test_multi_connector.py
+8
-0
tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
+81
-0
tests/v1/sample/test_topk_topp_sampler.py
tests/v1/sample/test_topk_topp_sampler.py
+72
-64
tests/v1/sample/utils.py
tests/v1/sample/utils.py
+2
-1
tests/v1/spec_decode/test_eagle.py
tests/v1/spec_decode/test_eagle.py
+14
-53
tests/v1/test_metrics_reader.py
tests/v1/test_metrics_reader.py
+112
-0
tests/v1/test_oracle.py
tests/v1/test_oracle.py
+1
-1
tests/v1/worker/test_gpu_input_batch.py
tests/v1/worker/test_gpu_input_batch.py
+3
-3
No files found.
tests/tensorizer_loader/test_tensorizer.py
View file @
4eabe123
# SPDX-License-Identifier: Apache-2.0
import
gc
import
json
import
os
import
pathlib
import
subprocess
from
functools
import
partial
from
unittest.mock
import
MagicMock
,
patch
import
openai
import
pytest
import
torch
from
huggingface_hub
import
snapshot_download
from
vllm
import
SamplingParams
from
vllm.engine.arg_utils
import
EngineArgs
...
...
@@ -22,12 +18,11 @@ from vllm.model_executor.model_loader.tensorizer import (TensorizerConfig,
is_vllm_tensorized
,
load_with_tensorizer
,
open_stream
,
serialize_vllm_model
,
tensorize_vllm_model
)
# yapf: enable
from
vllm.utils
import
PlaceholderModule
,
import_from_path
from
vllm.utils
import
PlaceholderModule
from
..utils
import
VLLM_PATH
,
RemoteOpenAIServer
from
..utils
import
VLLM_PATH
try
:
from
tensorizer
import
EncryptionParams
...
...
@@ -103,6 +98,7 @@ def test_can_deserialize_s3(vllm_runner):
@
pytest
.
mark
.
skipif
(
not
is_curl_installed
(),
reason
=
"cURL is not installed"
)
def
test_deserialized_encrypted_vllm_model_has_same_outputs
(
vllm_runner
,
tmp_path
):
args
=
EngineArgs
(
model
=
model_ref
)
with
vllm_runner
(
model_ref
)
as
vllm_model
:
model_path
=
tmp_path
/
(
model_ref
+
".tensors"
)
key_path
=
tmp_path
/
(
model_ref
+
".key"
)
...
...
@@ -110,15 +106,13 @@ def test_deserialized_encrypted_vllm_model_has_same_outputs(
outputs
=
vllm_model
.
generate
(
prompts
,
sampling_params
)
config_for_serializing
=
TensorizerConfig
(
tensorizer_uri
=
model_path
,
encryption_keyfile
=
key_path
)
config_for_serializing
=
TensorizerConfig
(
tensorizer_uri
=
str
(
model_path
)
,
encryption_keyfile
=
str
(
key_path
)
)
vllm_model
.
apply_model
(
partial
(
serialize_vllm_model
,
tensorizer_config
=
config_for_serializing
))
tensorize_vllm_model
(
args
,
config_for_serializing
)
config_for_deserializing
=
TensorizerConfig
(
tensorizer_uri
=
model_path
,
encryption_keyfile
=
key_path
)
config_for_deserializing
=
TensorizerConfig
(
tensorizer_uri
=
str
(
model_path
),
encryption_keyfile
=
str
(
key_path
)
)
with
vllm_runner
(
model_ref
,
load_format
=
"tensorizer"
,
...
...
@@ -154,113 +148,46 @@ def test_deserialized_hf_model_has_same_outputs(hf_runner, vllm_runner,
assert
outputs
==
deserialized_outputs
def
test_vllm_model_can_load_with_lora
(
vllm_runner
,
tmp_path
):
multilora_inference
=
import_from_path
(
"examples.offline_inference.multilora_inference"
,
EXAMPLES_PATH
/
"offline_inference/multilora_inference.py"
,
)
model_ref
=
"meta-llama/Llama-2-7b-hf"
lora_path
=
snapshot_download
(
repo_id
=
"yard1/llama-2-7b-sql-lora-test"
)
test_prompts
=
multilora_inference
.
create_test_prompts
(
lora_path
)
# Serialize model before deserializing and binding LoRA adapters
with
vllm_runner
(
model_ref
)
as
vllm_model
:
model_path
=
tmp_path
/
(
model_ref
+
".tensors"
)
vllm_model
.
apply_model
(
partial
(
serialize_vllm_model
,
tensorizer_config
=
TensorizerConfig
(
tensorizer_uri
=
model_path
)))
with
vllm_runner
(
model_ref
,
load_format
=
"tensorizer"
,
model_loader_extra_config
=
TensorizerConfig
(
tensorizer_uri
=
model_path
,
num_readers
=
1
,
),
enable_lora
=
True
,
max_loras
=
1
,
max_lora_rank
=
8
,
max_cpu_loras
=
2
,
max_num_seqs
=
50
,
max_model_len
=
1000
,
)
as
loaded_vllm_model
:
multilora_inference
.
process_requests
(
loaded_vllm_model
.
model
.
llm_engine
,
test_prompts
)
assert
loaded_vllm_model
def
test_load_without_tensorizer_load_format
(
vllm_runner
):
def
test_load_without_tensorizer_load_format
(
vllm_runner
,
capfd
):
model
=
None
with
pytest
.
raises
(
ValueError
)
:
try
:
model
=
vllm_runner
(
model_ref
,
model_loader_extra_config
=
TensorizerConfig
(
tensorizer_uri
=
"test"
))
except
RuntimeError
:
out
,
err
=
capfd
.
readouterr
()
combined_output
=
out
+
err
assert
(
"ValueError: Model loader extra config "
"is not supported for load "
"format LoadFormat.AUTO"
)
in
combined_output
finally
:
del
model
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
@
pytest
.
mark
.
skipif
(
not
is_curl_installed
(),
reason
=
"cURL is not installed"
)
def
test_openai_apiserver_with_tensorizer
(
vllm_runner
,
tmp_path
):
## Serialize model
with
vllm_runner
(
model_ref
)
as
vllm_model
:
model_path
=
tmp_path
/
(
model_ref
+
".tensors"
)
vllm_model
.
apply_model
(
partial
(
serialize_vllm_model
,
tensorizer_config
=
TensorizerConfig
(
tensorizer_uri
=
model_path
)))
model_loader_extra_config
=
{
"tensorizer_uri"
:
str
(
model_path
),
}
## Start OpenAI API server
openai_args
=
[
"--dtype"
,
"float16"
,
"--load-format"
,
"tensorizer"
,
"--model-loader-extra-config"
,
json
.
dumps
(
model_loader_extra_config
),
]
with
RemoteOpenAIServer
(
model_ref
,
openai_args
)
as
server
:
print
(
"Server ready."
)
client
=
server
.
get_client
()
completion
=
client
.
completions
.
create
(
model
=
model_ref
,
prompt
=
"Hello, my name is"
,
max_tokens
=
5
,
temperature
=
0.0
)
assert
completion
.
id
is
not
None
assert
len
(
completion
.
choices
)
==
1
assert
len
(
completion
.
choices
[
0
].
text
)
>=
5
assert
completion
.
choices
[
0
].
finish_reason
==
"length"
assert
completion
.
usage
==
openai
.
types
.
CompletionUsage
(
completion_tokens
=
5
,
prompt_tokens
=
6
,
total_tokens
=
11
)
def
test_raise_value_error_on_invalid_load_format
(
vllm_runner
):
def
test_raise_value_error_on_invalid_load_format
(
vllm_runner
,
capfd
):
model
=
None
with
pytest
.
raises
(
ValueError
)
:
try
:
model
=
vllm_runner
(
model_ref
,
load_format
=
"safetensors"
,
model_loader_extra_config
=
TensorizerConfig
(
tensorizer_uri
=
"test"
))
except
RuntimeError
:
out
,
err
=
capfd
.
readouterr
()
combined_output
=
out
+
err
assert
(
"ValueError: Model loader extra config is not supported "
"for load format LoadFormat.SAFETENSORS"
)
in
combined_output
finally
:
del
model
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
@
pytest
.
mark
.
skipif
(
torch
.
cuda
.
device_count
()
<
2
,
reason
=
"Requires 2 GPUs"
)
def
test_tensorizer_with_tp_path_without_template
(
vllm_runner
):
with
pytest
.
raises
(
ValueError
)
:
def
test_tensorizer_with_tp_path_without_template
(
vllm_runner
,
capfd
):
try
:
model_ref
=
"EleutherAI/pythia-1.4b"
tensorized_path
=
f
"s3://tensorized/
{
model_ref
}
/fp16/model.tensors"
...
...
@@ -275,6 +202,13 @@ def test_tensorizer_with_tp_path_without_template(vllm_runner):
tensor_parallel_size
=
2
,
disable_custom_all_reduce
=
True
,
)
except
RuntimeError
:
out
,
err
=
capfd
.
readouterr
()
combined_output
=
out
+
err
assert
(
"ValueError: For a sharded model, tensorizer_uri "
"should include a string format template like '%04d' "
"to be formatted with the rank "
"of the shard"
)
in
combined_output
@
pytest
.
mark
.
skipif
(
torch
.
cuda
.
device_count
()
<
2
,
reason
=
"Requires 2 GPUs"
)
...
...
@@ -288,7 +222,6 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(
enforce_eager
=
True
,
)
as
base_model
:
outputs
=
base_model
.
generate
(
prompts
,
sampling_params
)
base_model
.
model
.
llm_engine
.
model_executor
.
shutdown
()
# load model with two shards and serialize with encryption
model_path
=
str
(
tmp_path
/
(
model_ref
+
"-%02d.tensors"
))
...
...
@@ -296,7 +229,7 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(
tensorizer_config
=
TensorizerConfig
(
tensorizer_uri
=
model_path
,
encryption_keyfile
=
key_path
,
encryption_keyfile
=
str
(
key_path
)
,
)
tensorize_vllm_model
(
...
...
@@ -331,13 +264,12 @@ def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path):
model_ref
=
"facebook/opt-125m"
model_path
=
tmp_path
/
(
model_ref
+
".tensors"
)
config
=
TensorizerConfig
(
tensorizer_uri
=
str
(
model_path
))
args
=
EngineArgs
(
model
=
model_ref
,
device
=
"cuda"
)
with
vllm_runner
(
model_ref
)
as
vllm_model
:
outputs
=
vllm_model
.
generate
(
prompts
,
sampling_params
)
vllm_model
.
apply_model
(
partial
(
serialize_vllm_model
,
tensorizer_config
=
config
))
tensorize_vllm_model
(
args
,
config
)
assert
is_vllm_tensorized
(
config
)
with
vllm_runner
(
model_ref
,
...
...
tests/test_logger.py
View file @
4eabe123
# SPDX-License-Identifier: Apache-2.0
import
enum
import
json
import
logging
import
os
import
sys
import
tempfile
from
dataclasses
import
dataclass
from
json.decoder
import
JSONDecodeError
from
tempfile
import
NamedTemporaryFile
from
typing
import
Any
...
...
@@ -16,6 +17,7 @@ import pytest
from
vllm.logger
import
(
_DATE_FORMAT
,
_FORMAT
,
_configure_vllm_root_logger
,
enable_trace_function_call
,
init_logger
)
from
vllm.logging_utils
import
NewLineFormatter
from
vllm.logging_utils.dump_input
import
prepare_object_to_dump
def
f1
(
x
):
...
...
@@ -216,3 +218,37 @@ def test_custom_logging_config_causes_an_error_if_configure_logging_is_off():
assert
other_logger
.
handlers
!=
root_logger
.
handlers
assert
other_logger
.
level
!=
root_logger
.
level
assert
other_logger
.
propagate
def
test_prepare_object_to_dump
():
str_obj
=
'str'
assert
prepare_object_to_dump
(
str_obj
)
==
"'str'"
list_obj
=
[
1
,
2
,
3
]
assert
prepare_object_to_dump
(
list_obj
)
==
'[1, 2, 3]'
dict_obj
=
{
'a'
:
1
,
'b'
:
'b'
}
assert
prepare_object_to_dump
(
dict_obj
)
in
[
"{a: 1, b: 'b'}"
,
"{b: 'b', a: 1}"
]
set_obj
=
{
1
,
2
,
3
}
assert
prepare_object_to_dump
(
set_obj
)
==
'[1, 2, 3]'
tuple_obj
=
(
'a'
,
'b'
,
'c'
)
assert
prepare_object_to_dump
(
tuple_obj
)
==
"['a', 'b', 'c']"
class
CustomEnum
(
enum
.
Enum
):
A
=
enum
.
auto
()
B
=
enum
.
auto
()
C
=
enum
.
auto
()
assert
prepare_object_to_dump
(
CustomEnum
.
A
)
==
repr
(
CustomEnum
.
A
)
@
dataclass
class
CustomClass
:
a
:
int
b
:
str
assert
(
prepare_object_to_dump
(
CustomClass
(
1
,
'b'
))
==
"CustomClass(a=1, b='b')"
)
tests/test_outputs.py
0 → 100644
View file @
4eabe123
# SPDX-License-Identifier: Apache-2.0
from
vllm.outputs
import
RequestOutput
def
test_request_output_forward_compatible
():
output
=
RequestOutput
(
request_id
=
"test_request_id"
,
prompt
=
"test prompt"
,
prompt_token_ids
=
[
1
,
2
,
3
],
prompt_logprobs
=
None
,
outputs
=
[],
finished
=
False
,
example_arg_added_in_new_version
=
"some_value"
)
assert
output
is
not
None
tests/test_regression.py
View file @
4eabe123
...
...
@@ -60,6 +60,9 @@ def test_model_from_modelscope(monkeypatch: pytest.MonkeyPatch):
# model: https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
"VLLM_USE_MODELSCOPE"
,
"True"
)
# Don't use HF_TOKEN for ModelScope repos, otherwise it will fail
# with 400 Client Error: Bad Request.
m
.
setenv
(
"HF_TOKEN"
,
""
)
llm
=
LLM
(
model
=
"qwen/Qwen1.5-0.5B-Chat"
)
prompts
=
[
...
...
tests/tokenization/test_mistral_tokenizer.py
View file @
4eabe123
# SPDX-License-Identifier: Apache-2.0
import
pytest
from
mistral_common.protocol.instruct.messages
import
UserMessage
from
mistral_common.protocol.instruct.messages
import
(
AssistantMessage
,
ToolMessage
,
UserMessage
)
from
mistral_common.protocol.instruct.request
import
ChatCompletionRequest
from
mistral_common.protocol.instruct.tool_calls
import
Function
,
Tool
from
mistral_common.protocol.instruct.tool_calls
import
(
Function
,
FunctionCall
,
Tool
,
ToolCall
)
from
vllm.transformers_utils.tokenizers.mistral
import
(
make_mistral_chat_completion_request
)
# yapf: enable
@
pytest
.
mark
.
parametrize
(
"openai_request,expected_mistral_request"
,
[(
...
...
@@ -78,6 +81,107 @@ from vllm.transformers_utils.tokenizers.mistral import (
)
def
test_make_mistral_chat_completion_request
(
openai_request
,
expected_mistral_request
):
assert
(
make_mistral_chat_completion_request
(
openai_request
[
"messages"
],
openai_request
[
"tools"
])
==
expected_mistral_request
)
actual_request
=
make_mistral_chat_completion_request
(
openai_request
[
"messages"
],
openai_request
[
"tools"
])
assert
actual_request
==
expected_mistral_request
# Tool use with list content and reasoning_content
@
pytest
.
mark
.
parametrize
(
"openai_request,expected_mistral_request"
,
[(
{
"messages"
:
[
{
"role"
:
"user"
,
"content"
:
"What's the weather in Paris?"
,
},
{
"role"
:
"assistant"
,
"reasoning_content"
:
None
,
"content"
:
None
,
"tool_calls"
:
[{
"id"
:
"call123"
,
"type"
:
"function"
,
"function"
:
{
"name"
:
"get_weather"
,
"arguments"
:
'{"city": "Paris"}'
,
},
}],
},
{
"role"
:
"tool"
,
"content"
:
[{
"type"
:
"text"
,
"text"
:
"Rainy"
}],
"name"
:
"get_weather"
,
"tool_call_id"
:
"call123"
,
},
],
"tools"
:
[{
"type"
:
"function"
,
"function"
:
{
"name"
:
"get_weather"
,
"description"
:
"Gets the current weather in a city."
,
"parameters"
:
{
"type"
:
"object"
,
"properties"
:
{
"city"
:
{
"type"
:
"string"
,
"description"
:
"The city name"
}
},
"required"
:
[
"city"
],
},
},
}],
},
ChatCompletionRequest
(
messages
=
[
UserMessage
(
content
=
"What's the weather in Paris?"
),
AssistantMessage
(
content
=
None
,
tool_calls
=
[
ToolCall
(
id
=
"call123"
,
function
=
FunctionCall
(
name
=
"get_weather"
,
arguments
=
'{"city": "Paris"}'
,
),
)
],
),
ToolMessage
(
content
=
"Rainy"
,
tool_call_id
=
"call123"
,
name
=
"get_weather"
,
),
],
tools
=
[
Tool
(
type
=
"function"
,
function
=
Function
(
name
=
"get_weather"
,
description
=
"Gets the current weather in a city."
,
parameters
=
{
"type"
:
"object"
,
"properties"
:
{
"city"
:
{
"type"
:
"string"
,
"description"
:
"The city name"
}
},
"required"
:
[
"city"
],
},
),
)
],
),
)])
def
test_make_mistral_chat_completion_request_list_content
(
openai_request
,
expected_mistral_request
):
actual_request
=
make_mistral_chat_completion_request
(
openai_request
[
"messages"
],
openai_request
[
"tools"
])
assert
actual_request
==
expected_mistral_request
tests/tool_use/test_tool_choice_required.py
View file @
4eabe123
# SPDX-License-Identifier: Apache-2.0
import
json
import
re
from
copy
import
deepcopy
from
unittest.mock
import
MagicMock
import
pytest
import
regex
as
re
from
pydantic
import
TypeAdapter
from
vllm.entrypoints.openai.protocol
import
(
ChatCompletionRequest
,
...
...
tests/tool_use/utils.py
View file @
4eabe123
...
...
@@ -88,7 +88,7 @@ CONFIGS: dict[str, ServerConfig] = {
"meta-llama/Llama-4-Scout-17B-16E-Instruct"
,
"arguments"
:
[
"--enforce-eager"
,
"--no-enable-prefix-caching"
,
"--tool-call-parser"
,
"pythonic"
,
"--chat-template"
,
"--tool-call-parser"
,
"
llama4_
pythonic"
,
"--chat-template"
,
str
(
VLLM_PATH
/
"examples/tool_chat_template_llama4_pythonic.jinja"
),
"-tp"
,
"4"
...
...
tests/v1/core/test_scheduler_e2e.py
View file @
4eabe123
...
...
@@ -19,7 +19,8 @@ def model() -> LLM:
enable_prefix_caching
=
True
,
long_prefill_token_threshold
=
2
,
max_num_batched_tokens
=
6
,
max_num_seqs
=
3
)
max_num_seqs
=
3
,
block_size
=
16
)
def
test_concurrent_partial_prefill
(
model
):
...
...
@@ -27,3 +28,11 @@ def test_concurrent_partial_prefill(model):
assert
len
(
outputs
)
==
3
for
output
in
outputs
:
assert
len
(
output
.
outputs
)
==
1
def
test_prefix_cache_stats_is_recorded
(
model
):
# 17 tokens will make sure first 16 tokens are cached in a block
input_tokens
=
{
"prompt_token_ids"
:
[
101
]
*
17
}
_
=
model
.
generate
([
input_tokens
])
outputs
=
model
.
generate
([
input_tokens
])
assert
outputs
[
0
].
num_cached_tokens
==
16
tests/v1/engine/test_llm_engine.py
View file @
4eabe123
...
...
@@ -6,6 +6,7 @@ from typing import Optional
import
pytest
from
vllm
import
LLM
,
SamplingParams
from
vllm.v1.metrics.reader
import
Counter
,
Gauge
,
Histogram
,
Metric
,
Vector
MODEL
=
"facebook/opt-125m"
DTYPE
=
"half"
...
...
@@ -97,3 +98,67 @@ def test_parallel_sampling(vllm_model, example_prompts) -> None:
raise
AssertionError
(
f
"
{
len
(
completion_counts
)
}
unique completions; expected"
f
"
{
n
}
. Repeats:
{
repeats
}
"
)
def
test_engine_metrics
(
vllm_runner
,
monkeypatch
,
example_prompts
):
max_tokens
=
100
# Use spec decoding to test num_accepted_tokens_per_pos
speculative_config
=
{
"method"
:
"ngram"
,
"prompt_lookup_max"
:
5
,
"prompt_lookup_min"
:
3
,
"num_speculative_tokens"
:
5
,
}
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
with
vllm_runner
(
MODEL
,
speculative_config
=
speculative_config
,
disable_log_stats
=
False
,
)
as
vllm_model
:
model
:
LLM
=
vllm_model
.
model
sampling_params
=
SamplingParams
(
temperature
=
0.0
,
max_tokens
=
max_tokens
)
outputs
=
model
.
generate
(
example_prompts
,
sampling_params
)
n_prompts
=
len
(
example_prompts
)
assert
len
(
outputs
)
==
n_prompts
total_tokens
=
0
for
out
in
outputs
:
assert
len
(
out
.
outputs
)
==
1
total_tokens
+=
len
(
out
.
outputs
[
0
].
token_ids
)
assert
total_tokens
==
max_tokens
*
n_prompts
metrics
=
model
.
get_metrics
()
def
find_metric
(
name
)
->
list
[
Metric
]:
found
=
[]
for
metric
in
metrics
:
if
metric
.
name
==
name
:
found
.
append
(
metric
)
return
found
num_requests_running
=
find_metric
(
"vllm:num_requests_running"
)
assert
len
(
num_requests_running
)
==
1
assert
isinstance
(
num_requests_running
[
0
],
Gauge
)
assert
num_requests_running
[
0
].
value
==
.
0
generation_tokens
=
find_metric
(
"vllm:generation_tokens"
)
assert
len
(
generation_tokens
)
==
1
assert
isinstance
(
generation_tokens
[
0
],
Counter
)
assert
generation_tokens
[
0
].
value
==
total_tokens
request_generation_tokens
=
find_metric
(
"vllm:request_generation_tokens"
)
assert
len
(
request_generation_tokens
)
==
1
assert
isinstance
(
request_generation_tokens
[
0
],
Histogram
)
assert
"+Inf"
in
request_generation_tokens
[
0
].
buckets
assert
request_generation_tokens
[
0
].
buckets
[
"+Inf"
]
==
n_prompts
assert
request_generation_tokens
[
0
].
count
==
n_prompts
assert
request_generation_tokens
[
0
].
sum
==
total_tokens
num_accepted_tokens_per_pos
=
find_metric
(
"vllm:spec_decode_num_accepted_tokens_per_pos"
)
assert
len
(
num_accepted_tokens_per_pos
)
==
1
assert
isinstance
(
num_accepted_tokens_per_pos
[
0
],
Vector
)
assert
len
(
num_accepted_tokens_per_pos
[
0
].
values
)
==
5
tests/v1/entrypoints/llm/test_struct_output_generate.py
View file @
4eabe123
...
...
@@ -4,12 +4,12 @@
from
__future__
import
annotations
import
json
import
re
from
enum
import
Enum
from
typing
import
TYPE_CHECKING
,
Any
import
jsonschema
import
pytest
import
regex
as
re
from
pydantic
import
BaseModel
from
tests.reasoning.utils
import
run_reasoning_extraction
...
...
tests/v1/entrypoints/openai/test_completion.py
View file @
4eabe123
# SPDX-License-Identifier: Apache-2.0
import
re
from
typing
import
Optional
import
openai
# use the official client for correctness check
import
pytest
import
pytest_asyncio
import
regex
as
re
from
openai
import
BadRequestError
from
tests.utils
import
RemoteOpenAIServer
...
...
tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh
View file @
4eabe123
...
...
@@ -13,6 +13,8 @@ NUM_DECODE_INSTANCES=${NUM_DECODE_INSTANCES:-2} # Default to 2
# Find the git repository root directory
GIT_ROOT
=
$(
git rev-parse
--show-toplevel
)
SMI_BIN
=
$(
which nvidia-smi
||
which rocm-smi
)
# Trap the SIGINT signal (triggered by Ctrl+C)
trap
'kill $(jobs -pr)'
SIGINT SIGTERM EXIT
...
...
@@ -44,6 +46,13 @@ get_model_args() {
echo
"
$extra_args
"
}
get_num_gpus
()
{
if
[[
"
$SMI_BIN
"
==
*
"nvidia"
*
]]
;
then
echo
"
$(
$SMI_BIN
--query-gpu
=
name
--format
=
csv,noheader |
wc
-l
)
"
else
echo
"
$(
$SMI_BIN
-l
|
grep
GPU |
wc
-l
)
"
fi
}
# Function to run tests for a specific model
run_tests_for_model
()
{
...
...
@@ -64,7 +73,7 @@ run_tests_for_model() {
# Start prefill instances
for
i
in
$(
seq
0
$((
NUM_PREFILL_INSTANCES-1
))
)
;
do
# Calculate GPU ID - we'll distribute across available GPUs
GPU_ID
=
$((
i
%
$(
nvidia-smi
--query-gpu
=
name
--format
=
csv,noheader |
wc
-l
)
))
GPU_ID
=
$((
i
%
$(
get_num_gpus
)
))
# Calculate port number (base port + instance number)
PORT
=
$((
8100
+
i
))
# Calculate side channel port
...
...
@@ -96,7 +105,7 @@ run_tests_for_model() {
# Start decode instances
for
i
in
$(
seq
0
$((
NUM_DECODE_INSTANCES-1
))
)
;
do
# Calculate GPU ID - we'll distribute across available GPUs, starting from after prefill GPUs
GPU_ID
=
$((
(
i
+
NUM_PREFILL_INSTANCES
)
%
$(
nvidia-smi
--query-gpu
=
name
--format
=
csv,noheader |
wc
-l
)
))
GPU_ID
=
$((
(
i
+
NUM_PREFILL_INSTANCES
)
%
$(
get_num_gpus
)
))
# Calculate port number (base port + instance number)
PORT
=
$((
8200
+
i
))
# Calculate side channel port
...
...
tests/v1/kv_connector/unit/test_multi_connector.py
View file @
4eabe123
...
...
@@ -239,3 +239,11 @@ def get_connector_events() -> dict[str, list[str]]:
print
(
f
"[ERROR] Could not read connector events for
{
name
}
:
{
e
}
"
)
return
connector_events
def
test_engine_id_conflict
():
configs
=
[
KVTransferConfig
()
for
_
in
range
(
2
)]
ids
=
[
config
.
engine_id
for
config
in
configs
]
assert
ids
[
0
]
!=
ids
[
1
],
(
"Engine IDs should be different for different configs. "
f
"Got
{
ids
}
"
)
tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
View file @
4eabe123
...
...
@@ -340,3 +340,84 @@ def test_full_block_prompt():
output
=
outputs
[
0
]
assert
output
.
finish_reason
==
FinishReason
.
STOP
assert_scheduler_empty
(
scheduler
)
def
test_cannot_schedule_after_recv
():
"""
Test that we can handle no schedule after recv due to not
enough remaining KV blocks.
"""
# NOTE: the KVCacheManager will use 1 null block.
# So there are 5 total working blocks.
TOTAL_NUM_BLOCKS
=
6
vllm_config
=
create_vllm_config
()
scheduler
=
create_scheduler
(
vllm_config
,
num_blocks
=
TOTAL_NUM_BLOCKS
)
# Prime the KVCache.
NUM_PROMPT_BLOCKS
=
2
BLOCK_SIZE
=
vllm_config
.
cache_config
.
block_size
# Prompt will use 2 blocks + 1 block after we schedule.
NUM_TOKENS_LOCAL
=
int
(
BLOCK_SIZE
*
NUM_PROMPT_BLOCKS
)
NUM_TOKENS_REMOTE
=
int
(
BLOCK_SIZE
*
(
NUM_PROMPT_BLOCKS
+
0.5
))
request_normal
=
create_request
(
request_id
=
1
,
num_tokens
=
NUM_TOKENS_LOCAL
)
request_remote
=
create_request
(
request_id
=
2
,
num_tokens
=
NUM_TOKENS_REMOTE
,
do_remote_prefill
=
True
)
# STEP 1: 3 blocks are in use (2 for prompt, 1 for decode).
scheduler
.
add_request
(
request_normal
)
scheduler_output
=
scheduler
.
schedule
()
model_runner_output
=
create_model_runner_output
(
reqs
=
[
request_normal
])
scheduler
.
update_from_output
(
scheduler_output
,
model_runner_output
)
assert
len
(
scheduler
.
running
)
==
1
assert
len
(
scheduler
.
waiting
)
==
0
# Step 2: 5 blocks are in use (2 new for remote blocks).
scheduler
.
add_request
(
request_remote
)
scheduler_output
=
scheduler
.
schedule
()
model_runner_output
=
create_model_runner_output
(
reqs
=
[
request_normal
])
scheduler
.
update_from_output
(
scheduler_output
,
model_runner_output
)
assert
len
(
scheduler
.
running
)
==
1
assert
len
(
scheduler
.
waiting
)
==
1
# Step 3: finish recving (5 blocks in use)
scheduler_output
=
scheduler
.
schedule
()
model_runner_output
=
create_model_runner_output
(
reqs
=
[
request_normal
],
finished_recving
=
[
request_remote
.
request_id
])
scheduler
.
update_from_output
(
scheduler_output
,
model_runner_output
)
assert
len
(
scheduler
.
running
)
==
1
assert
len
(
scheduler
.
waiting
)
==
1
# Step 4: try to schedule, not enough blocks.
scheduler_output
=
scheduler
.
schedule
()
model_runner_output
=
create_model_runner_output
(
reqs
=
[
request_normal
])
scheduler
.
update_from_output
(
scheduler_output
,
model_runner_output
)
assert
len
(
scheduler
.
running
)
==
1
assert
len
(
scheduler
.
waiting
)
==
1
# Step 5: finish the request, free it.
scheduler_output
=
scheduler
.
schedule
()
model_runner_output
=
create_model_runner_output
(
reqs
=
[
request_normal
],
use_eos
=
True
)
scheduler
.
update_from_output
(
scheduler_output
,
model_runner_output
)
assert
len
(
scheduler
.
running
)
==
0
assert
len
(
scheduler
.
waiting
)
==
1
# Step 6: now we can schedule (with 2 blocks computed).
scheduler_output
=
scheduler
.
schedule
()
model_runner_output
=
create_model_runner_output
(
reqs
=
[
request_remote
])
assert
(
scheduler_output
.
scheduled_new_reqs
[
0
].
num_computed_tokens
==
NUM_PROMPT_BLOCKS
*
BLOCK_SIZE
)
scheduler
.
update_from_output
(
scheduler_output
,
model_runner_output
)
assert
len
(
scheduler
.
running
)
==
1
assert
len
(
scheduler
.
waiting
)
==
0
# Step 7: free everything.
scheduler_output
=
scheduler
.
schedule
()
model_runner_output
=
create_model_runner_output
(
reqs
=
[
request_remote
],
use_eos
=
True
)
scheduler
.
update_from_output
(
scheduler_output
,
model_runner_output
)
_
=
scheduler
.
schedule
()
assert_scheduler_empty
(
scheduler
)
tests/v1/sample/test_topk_topp_sampler.py
View file @
4eabe123
...
...
@@ -16,9 +16,20 @@ VOCAB_SIZE = 128 * 1024
FLASHINFER_ENABLED
=
current_platform
.
is_cuda
()
and
is_flashinfer_available
@
pytest
.
fixture
(
autouse
=
True
)
def
reset_default_device
():
"""
Explicitly set the default device, which can affect subsequent tests.
Adding this fixture helps avoid this problem.
"""
original_device
=
torch
.
get_default_device
()
yield
torch
.
set_default_device
(
original_device
)
def
test_topk_impl_equivalance
():
with
torch
.
device
(
DEVICE
)
:
torch
.
set_default_
device
(
DEVICE
)
generator
=
Generator
(
device
=
DEVICE
).
manual_seed
(
33
)
logits
=
torch
.
rand
((
BATCH_SIZE
,
VOCAB_SIZE
),
generator
=
generator
)
...
...
@@ -28,10 +39,8 @@ def test_topk_impl_equivalance():
# Set k=vocab_size for ~50% of requests in the batch (top-k disabled).
k
.
masked_fill_
(
torch
.
randint
(
0
,
2
,
(
BATCH_SIZE
,
),
generator
=
generator
,
dtype
=
bool
),
VOCAB_SIZE
)
torch
.
randint
(
0
,
2
,
(
BATCH_SIZE
,
),
generator
=
generator
,
dtype
=
bool
),
VOCAB_SIZE
)
# Top-k only implementation
result1
=
apply_top_k_top_p
(
logits
=
logits
.
clone
(),
k
=
k
,
p
=
None
)
...
...
@@ -58,7 +67,7 @@ def test_flashinfer_sampler():
pytest
.
skip
(
"FlashInfer not installed or not available on this platform."
)
with
torch
.
device
(
DEVICE
)
:
torch
.
set_default_
device
(
DEVICE
)
generator
=
Generator
(
device
=
DEVICE
).
manual_seed
(
42
)
# Generate random logits
...
...
@@ -67,8 +76,7 @@ def test_flashinfer_sampler():
# Generate various top-k and top-p values
k_values
=
torch
.
randint
(
1
,
1000
,
(
BATCH_SIZE
,
),
generator
=
generator
)
p_values
=
torch
.
rand
(
(
BATCH_SIZE
,
),
generator
=
generator
)
*
0.5
+
0.5
# range in [0.5, 1.0]
(
BATCH_SIZE
,
),
generator
=
generator
)
*
0.5
+
0.5
# range in [0.5, 1.0]
# Sometimes disable top-k (k=vocab_size)
k_values
.
masked_fill_
(
...
...
tests/v1/sample/utils.py
View file @
4eabe123
# SPDX-License-Identifier: Apache-2.0
import
re
from
enum
import
Enum
from
typing
import
Optional
import
regex
as
re
from
vllm
import
CompletionOutput
...
...
tests/v1/spec_decode/test_eagle.py
View file @
4eabe123
...
...
@@ -100,8 +100,12 @@ def test_prepare_inputs():
dtype
=
torch
.
int32
,
device
=
device
)
# n1 + n2 + n3 - a - b -c
num_tokens
=
cu_target_query_lens
[
-
1
].
item
()
-
num_rejected_tokens
.
sum
(
).
item
()
cu_num_tokens
,
token_indices
=
EagleProposer
.
prepare_inputs
(
cu_target_query_lens
,
num_rejected_tokens
)
cu_target_query_lens
,
num_rejected_tokens
,
num_tokens
)
assert
torch
.
equal
(
cu_num_tokens
,
expected_cu_num_tokens
)
assert
token_indices
.
shape
[
0
]
==
expected_cu_num_tokens
[
-
1
].
item
()
...
...
@@ -117,34 +121,13 @@ def test_prepare_inputs():
])
@
mock
.
patch
(
'vllm.v1.spec_decode.eagle.get_pp_group'
)
@
mock
.
patch
(
'vllm.v1.spec_decode.eagle.get_layers_from_vllm_config'
)
@
mock
.
patch
(
'vllm.v1.spec_decode.eagle.ModelRegistry'
)
@
mock
.
patch
(
'vllm.v1.spec_decode.eagle.get_model_loader'
)
@
mock
.
patch
(
'vllm.v1.spec_decode.eagle.set_default_torch_dtype'
)
@
mock
.
patch
(
'vllm.v1.spec_decode.eagle.set_current_vllm_config'
)
def
test_load_model
(
mock_set_config
,
mock_set_dtype
,
mock_get_loader
,
mock_registry
,
mock_get_layers
,
mock_get_pp_group
,
method
,
@
mock
.
patch
(
'vllm.v1.spec_decode.eagle.get_model'
)
def
test_load_model
(
mock_get_model
,
mock_get_layers
,
mock_get_pp_group
,
method
,
proposer_helper
,
draft_model_dir
,
target_attribute_path
):
# Setup mock for model class
mock_model_cls
=
mock
.
MagicMock
()
mock_registry
.
resolve_model_cls
.
return_value
=
(
mock_model_cls
,
"test_arch"
)
# Create a real context manager for mocks
class
MockContextManager
:
def
__init__
(
self
):
pass
def
__enter__
(
self
):
return
None
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
return
False
# Make the mocks return actual context manager objects
mock_set_dtype
.
return_value
=
MockContextManager
()
mock_set_config
.
return_value
=
MockContextManager
()
# Setup model mock
mock_model
=
mock
.
MagicMock
()
mock_get_model
.
return_value
=
mock_model
# Setup mocks for attention layers
target_attn_layers
=
{
...
...
@@ -164,25 +147,6 @@ def test_load_model(mock_set_config, mock_set_dtype, mock_get_loader,
mock_pp_group
.
world_size
=
2
if
method
==
"eagle"
else
1
mock_get_pp_group
.
return_value
=
mock_pp_group
# Setup model loader mock
mock_loader
=
mock
.
MagicMock
()
mock_get_loader
.
return_value
=
mock_loader
# Setup model mock
mock_model
=
mock
.
MagicMock
()
mock_model_cls
.
return_value
=
mock_model
mock_model
.
to
.
return_value
=
mock_model
# Configure mock to test the attribute sharing path
if
method
==
"eagle"
:
# For eagle, test the lm_head path
mock_model
.
load_weights
.
return_value
=
{
"model.embed_tokens.weight"
:
torch
.
zeros
(
1
)
}
else
:
# For eagle3, test the embed_tokens path
mock_model
.
load_weights
.
return_value
=
{}
# Setup target model with the appropriate attributes
target_model
=
mock
.
MagicMock
()
...
...
@@ -204,13 +168,7 @@ def test_load_model(mock_set_config, mock_set_dtype, mock_get_loader,
proposer
.
load_model
(
target_model
)
# Verify common interactions
mock_get_loader
.
assert_called_once
()
mock_model_cls
.
assert_called_once
()
mock_model
.
to
.
assert_called_once
()
mock_model
.
load_weights
.
assert_called_once
()
# Verify the loader was called with the right config
mock_get_loader
.
assert_called_once_with
(
proposer
.
vllm_config
.
load_config
)
mock_get_model
.
assert_called_once
()
# Verify the specific attribute sharing based on the method
if
method
==
"eagle"
:
...
...
@@ -288,6 +246,9 @@ def test_propose(num_speculative_tokens):
# Assign the mock to the proposer
proposer
.
model
=
model_mock
# Assign draft attn_layer_names since load_model is not invoked
proposer
.
attn_layer_names
=
[
"layer.0"
]
# Create input tensors
cu_num_tokens
=
torch
.
tensor
([
0
,
seq_len_1
,
total_tokens
],
dtype
=
torch
.
int32
,
...
...
tests/v1/test_metrics_reader.py
0 → 100644
View file @
4eabe123
# SPDX-License-Identifier: Apache-2.0
import
prometheus_client
import
pytest
from
vllm.v1.metrics.reader
import
(
Counter
,
Gauge
,
Histogram
,
Vector
,
get_metrics_snapshot
)
@
pytest
.
fixture
(
autouse
=
True
)
def
test_registry
(
monkeypatch
):
# Use a custom registry for tests
test_registry
=
prometheus_client
.
CollectorRegistry
(
auto_describe
=
True
)
monkeypatch
.
setattr
(
"vllm.v1.metrics.reader.REGISTRY"
,
test_registry
)
return
test_registry
@
pytest
.
mark
.
parametrize
(
"num_engines"
,
[
1
,
4
])
def
test_gauge_metric
(
test_registry
,
num_engines
):
g
=
prometheus_client
.
Gauge
(
"vllm:test_gauge"
,
"Test gauge metric"
,
labelnames
=
[
"model"
,
"engine_index"
],
registry
=
test_registry
)
for
i
in
range
(
num_engines
):
g
.
labels
(
model
=
"foo"
,
engine_index
=
str
(
i
)).
set
(
98.5
)
metrics
=
get_metrics_snapshot
()
assert
len
(
metrics
)
==
num_engines
engine_labels
=
[
str
(
i
)
for
i
in
range
(
num_engines
)]
for
m
in
metrics
:
assert
isinstance
(
m
,
Gauge
)
assert
m
.
name
==
"vllm:test_gauge"
assert
m
.
value
==
98.5
assert
m
.
labels
[
"model"
]
==
"foo"
assert
m
.
labels
[
"engine_index"
]
in
engine_labels
engine_labels
.
remove
(
m
.
labels
[
"engine_index"
])
@
pytest
.
mark
.
parametrize
(
"num_engines"
,
[
1
,
4
])
def
test_counter_metric
(
test_registry
,
num_engines
):
c
=
prometheus_client
.
Counter
(
"vllm:test_counter"
,
"Test counter metric"
,
labelnames
=
[
"model"
,
"engine_index"
],
registry
=
test_registry
)
for
i
in
range
(
num_engines
):
c
.
labels
(
model
=
"bar"
,
engine_index
=
str
(
i
)).
inc
(
19
)
metrics
=
get_metrics_snapshot
()
assert
len
(
metrics
)
==
num_engines
engine_labels
=
[
str
(
i
)
for
i
in
range
(
num_engines
)]
for
m
in
metrics
:
assert
isinstance
(
m
,
Counter
)
assert
m
.
name
==
"vllm:test_counter"
assert
m
.
value
==
19
assert
m
.
labels
[
"model"
]
==
"bar"
assert
m
.
labels
[
"engine_index"
]
in
engine_labels
engine_labels
.
remove
(
m
.
labels
[
"engine_index"
])
@
pytest
.
mark
.
parametrize
(
"num_engines"
,
[
1
,
4
])
def
test_histogram_metric
(
test_registry
,
num_engines
):
h
=
prometheus_client
.
Histogram
(
"vllm:test_histogram"
,
"Test histogram metric"
,
labelnames
=
[
"model"
,
"engine_index"
],
buckets
=
[
10
,
20
,
30
,
40
,
50
],
registry
=
test_registry
)
for
i
in
range
(
num_engines
):
hist
=
h
.
labels
(
model
=
"blaa"
,
engine_index
=
str
(
i
))
hist
.
observe
(
42
)
hist
.
observe
(
21
)
hist
.
observe
(
7
)
metrics
=
get_metrics_snapshot
()
assert
len
(
metrics
)
==
num_engines
engine_labels
=
[
str
(
i
)
for
i
in
range
(
num_engines
)]
for
m
in
metrics
:
assert
isinstance
(
m
,
Histogram
)
assert
m
.
name
==
"vllm:test_histogram"
assert
m
.
count
==
3
assert
m
.
sum
==
70
assert
m
.
buckets
[
"10.0"
]
==
1
assert
m
.
buckets
[
"20.0"
]
==
1
assert
m
.
buckets
[
"30.0"
]
==
2
assert
m
.
buckets
[
"40.0"
]
==
2
assert
m
.
buckets
[
"50.0"
]
==
3
assert
m
.
labels
[
"model"
]
==
"blaa"
assert
m
.
labels
[
"engine_index"
]
in
engine_labels
engine_labels
.
remove
(
m
.
labels
[
"engine_index"
])
@
pytest
.
mark
.
parametrize
(
"num_engines"
,
[
1
,
4
])
def
test_vector_metric
(
test_registry
,
num_engines
):
c
=
prometheus_client
.
Counter
(
"vllm:spec_decode_num_accepted_tokens_per_pos"
,
"Vector-like counter metric"
,
labelnames
=
[
"position"
,
"model"
,
"engine_index"
],
registry
=
test_registry
)
for
i
in
range
(
num_engines
):
c
.
labels
(
position
=
"0"
,
model
=
"llama"
,
engine_index
=
str
(
i
)).
inc
(
10
)
c
.
labels
(
position
=
"1"
,
model
=
"llama"
,
engine_index
=
str
(
i
)).
inc
(
5
)
c
.
labels
(
position
=
"2"
,
model
=
"llama"
,
engine_index
=
str
(
i
)).
inc
(
1
)
metrics
=
get_metrics_snapshot
()
assert
len
(
metrics
)
==
num_engines
engine_labels
=
[
str
(
i
)
for
i
in
range
(
num_engines
)]
for
m
in
metrics
:
assert
isinstance
(
m
,
Vector
)
assert
m
.
name
==
"vllm:spec_decode_num_accepted_tokens_per_pos"
assert
m
.
values
==
[
10
,
5
,
1
]
assert
m
.
labels
[
"model"
]
==
"llama"
assert
m
.
labels
[
"engine_index"
]
in
engine_labels
engine_labels
.
remove
(
m
.
labels
[
"engine_index"
])
tests/v1/test_oracle.py
View file @
4eabe123
...
...
@@ -12,7 +12,7 @@ UNSUPPORTED_MODELS_V1 = [
"openai/whisper-large-v3"
,
# transcription
"facebook/bart-large-cnn"
,
# encoder decoder
"mistralai/Mamba-Codestral-7B-v0.1"
,
# mamba
"hmellor/
bamba-
tiny-random"
,
# hybrid
"hmellor/tiny-random
-BambaForCausalLM
"
,
# hybrid
"BAAI/bge-m3"
,
# embedding
]
...
...
tests/v1/worker/test_gpu_input_batch.py
View file @
4eabe123
...
...
@@ -251,7 +251,7 @@ def test_sampling_metadata_in_input_batch(device: str, batch_size: int):
device
=
torch
.
device
(
device
),
pin_memory
=
is_pin_memory_available
(),
vocab_size
=
1024
,
kv_cache_config
=
get_kv_cache_config
()
,
block_size
=
1
,
)
reqs
:
list
[
CachedRequestState
]
=
[]
req_id_reqs
=
{}
...
...
@@ -341,7 +341,7 @@ def test_swap_states_in_input_batch(device: str, batch_size: int,
device
=
torch
.
device
(
device
),
pin_memory
=
is_pin_memory_available
(),
vocab_size
=
1024
,
kv_cache_config
=
get_kv_cache_config
()
,
block_size
=
1
,
)
ref_input_batch
:
InputBatch
=
InputBatch
(
max_num_reqs
=
batch_size
,
...
...
@@ -350,7 +350,7 @@ def test_swap_states_in_input_batch(device: str, batch_size: int,
device
=
torch
.
device
(
device
),
pin_memory
=
is_pin_memory_available
(),
vocab_size
=
1024
,
kv_cache_config
=
get_kv_cache_config
()
,
block_size
=
1
,
)
reqs
:
list
[
CachedRequestState
]
=
[]
...
...
Prev
1
…
17
18
19
20
21
22
23
24
25
…
34
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment