Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
156 additions
and
0 deletions
+156
-0
tests/lora/test_punica_ops_sizes.py
tests/lora/test_punica_ops_sizes.py
+1
-0
tests/lora/test_punica_ops_variation.py
tests/lora/test_punica_ops_variation.py
+1
-0
tests/lora/test_quant_model.py
tests/lora/test_quant_model.py
+2
-0
tests/lora/test_qwen2vl.py
tests/lora/test_qwen2vl.py
+2
-0
tests/lora/test_tokenizer_group.py
tests/lora/test_tokenizer_group.py
+2
-0
tests/lora/test_ultravox.py
tests/lora/test_ultravox.py
+123
-0
tests/lora/test_utils.py
tests/lora/test_utils.py
+2
-0
tests/lora/test_worker.py
tests/lora/test_worker.py
+2
-0
tests/lora/utils.py
tests/lora/utils.py
+2
-0
tests/metrics/test_metrics.py
tests/metrics/test_metrics.py
+2
-0
tests/model_executor/conftest.py
tests/model_executor/conftest.py
+2
-0
tests/model_executor/test_enabled_custom_ops.py
tests/model_executor/test_enabled_custom_ops.py
+2
-0
tests/model_executor/test_guided_processors.py
tests/model_executor/test_guided_processors.py
+2
-0
tests/model_executor/test_model_load_with_params.py
tests/model_executor/test_model_load_with_params.py
+2
-0
tests/model_executor/weight_utils.py
tests/model_executor/weight_utils.py
+2
-0
tests/models/decoder_only/audio_language/test_ultravox.py
tests/models/decoder_only/audio_language/test_ultravox.py
+2
-0
tests/models/decoder_only/language/test_aqlm.py
tests/models/decoder_only/language/test_aqlm.py
+1
-0
tests/models/decoder_only/language/test_fp8.py
tests/models/decoder_only/language/test_fp8.py
+2
-0
tests/models/decoder_only/language/test_gguf.py
tests/models/decoder_only/language/test_gguf.py
+1
-0
tests/models/decoder_only/language/test_gptq_marlin.py
tests/models/decoder_only/language/test_gptq_marlin.py
+1
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
tests/lora/test_punica_ops_sizes.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
This script is mainly used to tests various hidden_sizes. We have collected the
hidden_sizes included in the LoRA models currently supported by vLLM. It tests
...
...
tests/lora/test_punica_ops_variation.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
This script is mainly used to test whether trtion kernels can run normally
under different conditions, including various batches, numbers of LoRA , and
...
...
tests/lora/test_quant_model.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# Adapted from
# https://github.com/fmmoret/vllm/blob/fm-support-lora-on-quantized-models/tests/lora/test_llama.py
from
dataclasses
import
dataclass
...
...
tests/lora/test_qwen2vl.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
pytest
...
...
tests/lora/test_tokenizer_group.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
from
transformers
import
AutoTokenizer
,
PreTrainedTokenizerBase
...
...
tests/lora/test_ultravox.py
0 → 100644
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
shutil
from
os
import
path
from
tempfile
import
TemporaryDirectory
from
typing
import
List
,
Tuple
import
torch
from
huggingface_hub
import
snapshot_download
from
safetensors.torch
import
load_file
,
save_file
from
transformers
import
AutoTokenizer
from
vllm.lora.request
import
LoRARequest
from
..models.utils
import
check_outputs_equal
ULTRAVOX_MODEL_NAME
=
"fixie-ai/ultravox-v0_3"
LLMA_MODEL_NAME
=
"meta-llama/Llama-3.1-8B-Instruct"
VLLM_PLACEHOLDER
=
"<|reserved_special_token_0|>"
PROMPT
=
"Tell me about a Fool's mate move in 20 words. Provide the moves!"
def
llama3_1_8b_chess_lora_path
():
return
snapshot_download
(
repo_id
=
"mkopecki/chess-lora-adapter-llama-3.1-8b"
)
# can't use llama lora adapter without module name transformation
# because ultravox nest language model
def
transform_module_names_for_ultravox
(
state_dict
):
transformed_state_dict
=
{}
for
key
,
value
in
state_dict
.
items
():
new_key
=
key
.
replace
(
"base_model.model"
,
"base_model.model.language_model"
)
transformed_state_dict
[
new_key
]
=
value
return
transformed_state_dict
def
mk_llama3_1_8b_ultravox_chess_lora
(
source_repo
,
target_path
):
tensor_file
=
"adapter_model.safetensors"
state_dict
=
load_file
(
path
.
join
(
source_repo
,
tensor_file
))
transformed_state_dict
=
transform_module_names_for_ultravox
(
state_dict
)
save_file
(
transformed_state_dict
,
path
.
join
(
target_path
,
tensor_file
))
config_file
=
"adapter_config.json"
shutil
.
copyfile
(
path
.
join
(
source_repo
,
config_file
),
path
.
join
(
target_path
,
config_file
))
return
target_path
def
_get_prompt
(
audio_count
,
question
,
placeholder
,
model_name
)
->
str
:
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_name
)
placeholder
=
f
"
{
placeholder
}
\n
"
*
audio_count
return
tokenizer
.
apply_chat_template
([{
'role'
:
'user'
,
'content'
:
f
"
{
placeholder
}{
question
}
"
}],
tokenize
=
False
,
add_generation_prompt
=
True
)
def
test_ultravox_lora
(
vllm_runner
):
"""
TODO: Train an Ultravox LoRA instead of using a Llama LoRA.
"""
# Workaround to prevent device mismatch in Whisper.
# Can be removed when it is fixed upstream in transformer
# https://github.com/huggingface/transformers/pull/35866
torch
.
set_default_device
(
"cpu"
)
llama3_1_8b_chess_lora
=
llama3_1_8b_chess_lora_path
()
with
TemporaryDirectory
()
as
temp_ultravox_lora_dir
:
llama3_1_8b_ultravox_chess_lora
=
mk_llama3_1_8b_ultravox_chess_lora
(
llama3_1_8b_chess_lora
,
temp_ultravox_lora_dir
)
with
vllm_runner
(
ULTRAVOX_MODEL_NAME
,
enforce_eager
=
True
,
max_num_seqs
=
2
,
enable_lora
=
True
,
max_loras
=
1
,
max_lora_rank
=
128
,
dtype
=
"bfloat16"
,
max_model_len
=
1024
,
)
as
vllm_model
:
ultravox_outputs
:
List
[
Tuple
[
List
[
int
],
str
]]
=
vllm_model
.
generate_greedy
(
[
_get_prompt
(
0
,
PROMPT
,
VLLM_PLACEHOLDER
,
ULTRAVOX_MODEL_NAME
)
],
256
,
lora_request
=
LoRARequest
(
str
(
1
),
1
,
llama3_1_8b_ultravox_chess_lora
),
)
# run llama with and without lora to compare outputs with above
with
vllm_runner
(
LLMA_MODEL_NAME
,
enforce_eager
=
True
,
max_num_seqs
=
2
,
enable_lora
=
True
,
max_loras
=
1
,
max_lora_rank
=
128
,
dtype
=
"bfloat16"
,
max_model_len
=
1024
,
)
as
vllm_model
:
llama_outputs
:
List
[
Tuple
[
List
[
int
],
str
]]
=
(
vllm_model
.
generate_greedy
(
[
_get_prompt
(
0
,
PROMPT
,
VLLM_PLACEHOLDER
,
LLMA_MODEL_NAME
)],
256
,
lora_request
=
LoRARequest
(
str
(
1
),
1
,
llama3_1_8b_chess_lora
),
))
check_outputs_equal
(
outputs_0_lst
=
ultravox_outputs
,
outputs_1_lst
=
llama_outputs
,
name_0
=
"ultravox"
,
name_1
=
"llama"
,
)
tests/lora/test_utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
collections
import
OrderedDict
from
unittest.mock
import
patch
...
...
tests/lora/test_worker.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
random
import
tempfile
...
...
tests/lora/utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Dict
,
List
,
Optional
import
torch
...
...
tests/metrics/test_metrics.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
time
from
typing
import
List
...
...
tests/model_executor/conftest.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
...
...
tests/model_executor/test_enabled_custom_ops.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
pytest
...
...
tests/model_executor/test_guided_processors.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pickle
import
pytest
...
...
tests/model_executor/test_model_load_with_params.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
pytest
...
...
tests/model_executor/weight_utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
tempfile
...
...
tests/models/decoder_only/audio_language/test_ultravox.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Tuple
,
Type
import
numpy
as
np
...
...
tests/models/decoder_only/language/test_aqlm.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of a AQLM model between vLLM and HF Transformers
Run `pytest tests/models/test_aqlm.py`.
...
...
tests/models/decoder_only/language/test_fp8.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# flake8: noqa
"""Tests fp8 models against ground truth generation
Note: these tests will only pass on L4 GPU.
...
...
tests/models/decoder_only/language/test_gguf.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Tests gguf models against unquantized models generations
Note: To pass the test, quantization higher than Q4 should be used
...
...
tests/models/decoder_only/language/test_gptq_marlin.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compares the outputs of gptq vs gptq_marlin
Note: GPTQ and Marlin do not have bitwise correctness.
As a result, in this test, we just confirm that the top selected tokens of the
...
...
Prev
1
…
12
13
14
15
16
17
18
19
20
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment