Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e0cbad4e
Unverified
Commit
e0cbad4e
authored
May 27, 2025
by
Satyajith Chilappagari
Committed by
GitHub
May 27, 2025
Browse files
[Neuron] Support quantization on neuron (#18283)
Signed-off-by:
Satyajith Chilappagari
<
satchill@amazon.com
>
parent
b48d5cca
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
2 deletions
+20
-2
tests/neuron/1_core/test_neuron_quant.py
tests/neuron/1_core/test_neuron_quant.py
+11
-0
vllm/model_executor/layers/quantization/neuron_quant.py
vllm/model_executor/layers/quantization/neuron_quant.py
+8
-1
vllm/platforms/neuron.py
vllm/platforms/neuron.py
+1
-1
No files found.
tests/neuron/1_core/test_neuron_quant.py
0 → 100644
View file @
e0cbad4e
# SPDX-License-Identifier: Apache-2.0
from
vllm.model_executor.layers.quantization.neuron_quant
import
(
NeuronQuantConfig
)
def
test_get_supported_act_dtypes
():
neuron_quant_config
=
NeuronQuantConfig
()
supported_act_dtypes
=
neuron_quant_config
.
get_supported_act_dtypes
()
target_list
=
[
"any_dtype1"
,
"any_dtype2"
]
for
dtype
in
target_list
:
assert
dtype
in
supported_act_dtypes
vllm/model_executor/layers/quantization/neuron_quant.py
View file @
e0cbad4e
...
...
@@ -13,6 +13,12 @@ from vllm.model_executor.layers.quantization.base_config import (
SUPPORTED_QUANT_DTYPE_LIST
=
[
's8'
,
'f8e4m3fn'
]
class
AlwaysSupportedDtypes
(
list
):
def
__contains__
(
self
,
item
):
return
True
class
NeuronQuantConfig
(
QuantizationConfig
):
"""Int8 Quantization Config class for Neuron Backend."""
...
...
@@ -35,7 +41,8 @@ class NeuronQuantConfig(QuantizationConfig):
return
"neuron_quant"
def
get_supported_act_dtypes
(
self
)
->
list
[
str
]:
return
SUPPORTED_QUANT_DTYPE_LIST
# Neuron implements custom handling logic for quantization support
return
AlwaysSupportedDtypes
()
@
classmethod
def
get_min_capability
(
cls
)
->
int
:
...
...
vllm/platforms/neuron.py
View file @
e0cbad4e
...
...
@@ -28,7 +28,7 @@ class NeuronPlatform(Platform):
device_name
:
str
=
"neuron"
device_type
:
str
=
"neuron"
ray_device_key
:
str
=
"neuron_cores"
supported_quantization
:
list
[
str
]
=
[
"neuron_quant"
]
supported_quantization
:
list
[
str
]
=
[
"neuron_quant"
,
"fbgemm_fp8"
]
device_control_env_var
:
str
=
"NEURON_RT_VISIBLE_CORES"
@
classmethod
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment