Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
24834f48
Unverified
Commit
24834f48
authored
Apr 09, 2025
by
ajayvohra2005
Committed by
GitHub
Apr 09, 2025
Browse files
update neuron config (#16289)
Signed-off-by:
Ajay Vohra
<
ajayvohr@amazon.com
>
parent
ec7da6fc
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
1 deletion
+32
-1
vllm/model_executor/model_loader/neuron.py
vllm/model_executor/model_loader/neuron.py
+32
-1
No files found.
vllm/model_executor/model_loader/neuron.py
View file @
24834f48
...
@@ -174,8 +174,39 @@ def _is_neuron_on_device_sampling_disabled(model_config: ModelConfig) -> bool:
...
@@ -174,8 +174,39 @@ def _is_neuron_on_device_sampling_disabled(model_config: ModelConfig) -> bool:
def
_get_neuron_config_after_override
(
default_neuron_config
,
def
_get_neuron_config_after_override
(
default_neuron_config
,
overridden_neuron_config
):
overridden_neuron_config
):
from
transformers_neuronx.config
import
NeuronConfig
from
transformers_neuronx.config
import
(
ContinuousBatchingConfig
,
GenerationConfig
,
KVCacheQuantizationConfig
,
NeuronConfig
,
QuantizationConfig
,
SparseAttnConfig
)
overridden_neuron_config
=
overridden_neuron_config
or
{}
overridden_neuron_config
=
overridden_neuron_config
or
{}
sparse_attn
=
overridden_neuron_config
.
pop
(
"sparse_attn"
,
{})
if
sparse_attn
:
overridden_neuron_config
[
"sparse_attn"
]
=
SparseAttnConfig
(
**
sparse_attn
)
kv_cache_quant
=
overridden_neuron_config
.
pop
(
"kv_cache_quant"
,
{})
if
kv_cache_quant
:
overridden_neuron_config
[
"kv_cache_quant"
]
=
KVCacheQuantizationConfig
(
**
kv_cache_quant
)
continuous_batching
=
overridden_neuron_config
.
pop
(
"continuous_batching"
,
{})
if
continuous_batching
:
overridden_neuron_config
[
"continuous_batching"
]
=
ContinuousBatchingConfig
(
**
continuous_batching
)
quant
=
overridden_neuron_config
.
pop
(
"quant"
,
{})
if
quant
:
overridden_neuron_config
[
"quant"
]
=
QuantizationConfig
(
**
quant
)
on_device_generation
=
overridden_neuron_config
.
pop
(
"on_device_generation"
,
{})
if
on_device_generation
:
overridden_neuron_config
[
"on_device_generation"
]
=
GenerationConfig
(
**
on_device_generation
)
default_neuron_config
.
update
(
overridden_neuron_config
)
default_neuron_config
.
update
(
overridden_neuron_config
)
return
NeuronConfig
(
**
default_neuron_config
)
return
NeuronConfig
(
**
default_neuron_config
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment