Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
dc1440cf
Unverified
Commit
dc1440cf
authored
May 19, 2025
by
Satyajith Chilappagari
Committed by
GitHub
May 19, 2025
Browse files
Neuron up mistral (#18222)
Signed-off-by:
Satyajith Chilappagari
<
satchill@amazon.com
>
parent
81712218
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
36 additions
and
2 deletions
+36
-2
tests/neuron/2_core/test_mistral.py
tests/neuron/2_core/test_mistral.py
+32
-0
vllm/model_executor/model_loader/neuronx_distributed.py
vllm/model_executor/model_loader/neuronx_distributed.py
+3
-0
vllm/platforms/neuron.py
vllm/platforms/neuron.py
+1
-2
No files found.
tests/neuron/2_core/test_mistral.py
0 → 100644
View file @
dc1440cf
# SPDX-License-Identifier: Apache-2.0
from
vllm
import
LLM
,
SamplingParams
def
test_mistral
():
llm
=
LLM
(
model
=
"mistralai/Mistral-7B-v0.1"
,
tensor_parallel_size
=
2
,
max_num_seqs
=
4
,
max_model_len
=
512
,
use_v2_block_manager
=
True
,
override_neuron_config
=
{
"sequence_parallel_enabled"
:
False
,
"skip_warmup"
:
True
},
device
=
"neuron"
)
prompts
=
[
"The president of the United States is"
,
"The capital of France is"
,
]
outputs
=
llm
.
generate
(
prompts
,
SamplingParams
(
top_k
=
1
))
expected_outputs
=
[
" the most powerful person in the world. He is the head of state "
"and head"
,
" a city of many faces. It is a city of history, culture, art"
]
for
expected_output
,
output
in
zip
(
expected_outputs
,
outputs
):
generated_text
=
output
.
outputs
[
0
].
text
assert
(
expected_output
==
generated_text
)
vllm/model_executor/model_loader/neuronx_distributed.py
View file @
dc1440cf
...
@@ -48,6 +48,9 @@ TORCH_DTYPE_TO_NEURON_AMP = {
...
@@ -48,6 +48,9 @@ TORCH_DTYPE_TO_NEURON_AMP = {
# Models supported by Neuronx distributed for inference.
# Models supported by Neuronx distributed for inference.
_NEURON_SUPPORTED_MODELS
:
dict
[
str
,
tuple
[
str
,
str
]]
=
{
_NEURON_SUPPORTED_MODELS
:
dict
[
str
,
tuple
[
str
,
str
]]
=
{
"LlamaForCausalLM"
:
"LlamaForCausalLM"
:
(
"neuronx_distributed_inference.models.llama.modeling_llama"
,
"NeuronLlamaForCausalLM"
),
"MistralForCausalLM"
:
(
"neuronx_distributed_inference.models.llama.modeling_llama"
,
(
"neuronx_distributed_inference.models.llama.modeling_llama"
,
"NeuronLlamaForCausalLM"
),
"NeuronLlamaForCausalLM"
),
"DbrxForCausalLM"
:
"DbrxForCausalLM"
:
...
...
vllm/platforms/neuron.py
View file @
dc1440cf
...
@@ -51,8 +51,7 @@ class NeuronPlatform(Platform):
...
@@ -51,8 +51,7 @@ class NeuronPlatform(Platform):
assert
(
vllm_config
.
lora_config
assert
(
vllm_config
.
lora_config
is
None
),
"LoRA is not supported for Neuron backend."
is
None
),
"LoRA is not supported for Neuron backend."
cache_config
=
vllm_config
.
cache_config
if
vllm_config
.
cache_config
and
vllm_config
.
model_config
:
if
cache_config
:
# neuron needs block_size = max_model_len
# neuron needs block_size = max_model_len
vllm_config
.
cache_config
.
block_size
=
\
vllm_config
.
cache_config
.
block_size
=
\
vllm_config
.
model_config
.
max_model_len
# type: ignore
vllm_config
.
model_config
.
max_model_len
# type: ignore
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment