Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bc8a8ce5
Unverified
Commit
bc8a8ce5
authored
Jul 22, 2025
by
Kebe
Committed by
GitHub
Jul 22, 2025
Browse files
[Misc] Remove deprecated args in v0.10 (#21349)
Signed-off-by:
Kebe
<
mail@kebe7jun.com
>
parent
32142b3c
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
0 additions
and
25 deletions
+0
-25
examples/offline_inference/neuron_speculation.py
examples/offline_inference/neuron_speculation.py
+0
-1
tests/neuron/2_core/test_mistral.py
tests/neuron/2_core/test_mistral.py
+0
-1
tests/neuron/2_core/test_multi_lora.py
tests/neuron/2_core/test_multi_lora.py
+0
-2
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+0
-21
No files found.
examples/offline_inference/neuron_speculation.py
View file @
bc8a8ce5
...
@@ -37,7 +37,6 @@ def initialize_llm():
...
@@ -37,7 +37,6 @@ def initialize_llm():
max_num_seqs
=
4
,
max_num_seqs
=
4
,
max_model_len
=
2048
,
max_model_len
=
2048
,
block_size
=
2048
,
block_size
=
2048
,
use_v2_block_manager
=
True
,
device
=
"neuron"
,
device
=
"neuron"
,
tensor_parallel_size
=
32
,
tensor_parallel_size
=
32
,
)
)
...
...
tests/neuron/2_core/test_mistral.py
View file @
bc8a8ce5
...
@@ -9,7 +9,6 @@ def test_mistral():
...
@@ -9,7 +9,6 @@ def test_mistral():
tensor_parallel_size
=
2
,
tensor_parallel_size
=
2
,
max_num_seqs
=
4
,
max_num_seqs
=
4
,
max_model_len
=
128
,
max_model_len
=
128
,
use_v2_block_manager
=
True
,
override_neuron_config
=
{
override_neuron_config
=
{
"sequence_parallel_enabled"
:
False
,
"sequence_parallel_enabled"
:
False
,
"skip_warmup"
:
True
"skip_warmup"
:
True
...
...
tests/neuron/2_core/test_multi_lora.py
View file @
bc8a8ce5
...
@@ -14,7 +14,6 @@ def test_llama_single_lora():
...
@@ -14,7 +14,6 @@ def test_llama_single_lora():
tensor_parallel_size
=
2
,
tensor_parallel_size
=
2
,
max_num_seqs
=
4
,
max_num_seqs
=
4
,
max_model_len
=
512
,
max_model_len
=
512
,
use_v2_block_manager
=
True
,
override_neuron_config
=
{
override_neuron_config
=
{
"sequence_parallel_enabled"
:
False
,
"sequence_parallel_enabled"
:
False
,
"skip_warmup"
:
True
,
"skip_warmup"
:
True
,
...
@@ -57,7 +56,6 @@ def test_llama_multiple_lora():
...
@@ -57,7 +56,6 @@ def test_llama_multiple_lora():
tensor_parallel_size
=
2
,
tensor_parallel_size
=
2
,
max_num_seqs
=
4
,
max_num_seqs
=
4
,
max_model_len
=
512
,
max_model_len
=
512
,
use_v2_block_manager
=
True
,
override_neuron_config
=
{
override_neuron_config
=
{
"sequence_parallel_enabled"
:
"sequence_parallel_enabled"
:
False
,
False
,
...
...
vllm/engine/arg_utils.py
View file @
bc8a8ce5
...
@@ -313,7 +313,6 @@ class EngineArgs:
...
@@ -313,7 +313,6 @@ class EngineArgs:
CacheConfig
.
prefix_caching_hash_algo
CacheConfig
.
prefix_caching_hash_algo
disable_sliding_window
:
bool
=
ModelConfig
.
disable_sliding_window
disable_sliding_window
:
bool
=
ModelConfig
.
disable_sliding_window
disable_cascade_attn
:
bool
=
ModelConfig
.
disable_cascade_attn
disable_cascade_attn
:
bool
=
ModelConfig
.
disable_cascade_attn
use_v2_block_manager
:
bool
=
True
swap_space
:
float
=
CacheConfig
.
swap_space
swap_space
:
float
=
CacheConfig
.
swap_space
cpu_offload_gb
:
float
=
CacheConfig
.
cpu_offload_gb
cpu_offload_gb
:
float
=
CacheConfig
.
cpu_offload_gb
gpu_memory_utilization
:
float
=
CacheConfig
.
gpu_memory_utilization
gpu_memory_utilization
:
float
=
CacheConfig
.
gpu_memory_utilization
...
@@ -364,7 +363,6 @@ class EngineArgs:
...
@@ -364,7 +363,6 @@ class EngineArgs:
max_prompt_adapter_token
:
int
=
\
max_prompt_adapter_token
:
int
=
\
PromptAdapterConfig
.
max_prompt_adapter_token
PromptAdapterConfig
.
max_prompt_adapter_token
device
:
Device
=
DeviceConfig
.
device
num_scheduler_steps
:
int
=
SchedulerConfig
.
num_scheduler_steps
num_scheduler_steps
:
int
=
SchedulerConfig
.
num_scheduler_steps
multi_step_stream_outputs
:
bool
=
SchedulerConfig
.
multi_step_stream_outputs
multi_step_stream_outputs
:
bool
=
SchedulerConfig
.
multi_step_stream_outputs
ray_workers_use_nsight
:
bool
=
ParallelConfig
.
ray_workers_use_nsight
ray_workers_use_nsight
:
bool
=
ParallelConfig
.
ray_workers_use_nsight
...
@@ -745,16 +743,6 @@ class EngineArgs:
...
@@ -745,16 +743,6 @@ class EngineArgs:
"--max-prompt-adapter-token"
,
"--max-prompt-adapter-token"
,
**
prompt_adapter_kwargs
[
"max_prompt_adapter_token"
])
**
prompt_adapter_kwargs
[
"max_prompt_adapter_token"
])
# Device arguments
device_kwargs
=
get_kwargs
(
DeviceConfig
)
device_group
=
parser
.
add_argument_group
(
title
=
"DeviceConfig"
,
description
=
DeviceConfig
.
__doc__
,
)
device_group
.
add_argument
(
"--device"
,
**
device_kwargs
[
"device"
],
deprecated
=
True
)
# Speculative arguments
# Speculative arguments
speculative_group
=
parser
.
add_argument_group
(
speculative_group
=
parser
.
add_argument_group
(
title
=
"SpeculativeConfig"
,
title
=
"SpeculativeConfig"
,
...
@@ -856,15 +844,6 @@ class EngineArgs:
...
@@ -856,15 +844,6 @@ class EngineArgs:
**
vllm_kwargs
[
"additional_config"
])
**
vllm_kwargs
[
"additional_config"
])
# Other arguments
# Other arguments
parser
.
add_argument
(
'--use-v2-block-manager'
,
action
=
'store_true'
,
default
=
True
,
deprecated
=
True
,
help
=
'[DEPRECATED] block manager v1 has been '
'removed and SelfAttnBlockSpaceManager (i.e. '
'block manager v2) is now the default. '
'Setting this flag to True or False'
' has no effect on vLLM behavior.'
)
parser
.
add_argument
(
'--disable-log-stats'
,
parser
.
add_argument
(
'--disable-log-stats'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'Disable logging statistics.'
)
help
=
'Disable logging statistics.'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment