Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2c5e637b
Unverified
Commit
2c5e637b
authored
Feb 22, 2025
by
Kevin H. Luu
Committed by
GitHub
Feb 22, 2025
Browse files
[ci] Use env var to control whether to use S3 bucket in CI (#13634)
parent
322d2a27
Changes
30
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
155 additions
and
31 deletions
+155
-31
tests/mq_llm_engine/test_load.py
tests/mq_llm_engine/test_load.py
+2
-4
tests/multimodal/test_processing.py
tests/multimodal/test_processing.py
+2
-4
tests/prefix_caching/test_prefix_caching.py
tests/prefix_caching/test_prefix_caching.py
+1
-1
tests/test_config.py
tests/test_config.py
+4
-10
tests/test_regression.py
tests/test_regression.py
+3
-10
tests/worker/test_swap.py
tests/worker/test_swap.py
+1
-1
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+9
-0
vllm/envs.py
vllm/envs.py
+4
-0
vllm/model_executor/model_loader/loader.py
vllm/model_executor/model_loader/loader.py
+0
-1
vllm/test_utils.py
vllm/test_utils.py
+129
-0
No files found.
tests/mq_llm_engine/test_load.py
View file @
2c5e637b
...
@@ -10,14 +10,12 @@ import pytest
...
@@ -10,14 +10,12 @@ import pytest
from
tests.mq_llm_engine.utils
import
RemoteMQLLMEngine
,
generate
from
tests.mq_llm_engine.utils
import
RemoteMQLLMEngine
,
generate
from
vllm.engine.arg_utils
import
AsyncEngineArgs
from
vllm.engine.arg_utils
import
AsyncEngineArgs
MODEL
=
"
s3://vllm-ci-model-weights
/gemma-1.1-2b-it"
MODEL
=
"
google
/gemma-1.1-2b-it"
NUM_EXPECTED_TOKENS
=
10
NUM_EXPECTED_TOKENS
=
10
NUM_REQUESTS
=
10000
NUM_REQUESTS
=
10000
# Scenarios to test for num generated token.
# Scenarios to test for num generated token.
ENGINE_ARGS
=
AsyncEngineArgs
(
model
=
MODEL
,
ENGINE_ARGS
=
AsyncEngineArgs
(
model
=
MODEL
,
disable_log_requests
=
True
)
load_format
=
"runai_streamer"
,
disable_log_requests
=
True
)
@
pytest
.
fixture
(
scope
=
"function"
)
@
pytest
.
fixture
(
scope
=
"function"
)
...
...
tests/multimodal/test_processing.py
View file @
2c5e637b
...
@@ -553,8 +553,7 @@ def test_find_mm_placeholders(
...
@@ -553,8 +553,7 @@ def test_find_mm_placeholders(
assert
result
==
expected
assert
result
==
expected
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"llava-hf/llava-v1.6-mistral-7b-hf"
])
"model_id"
,
[
"s3://vllm-ci-model-weights/llava-v1.6-mistral-7b-hf"
])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
(
"limit"
,
"num_supported"
,
"is_valid"
),
(
"limit"
,
"num_supported"
,
"is_valid"
),
[(
0
,
0
,
True
),
(
0
,
1
,
True
),
(
1
,
0
,
False
),
(
1
,
1
,
True
),
(
1
,
2
,
True
),
[(
0
,
0
,
True
),
(
0
,
1
,
True
),
(
1
,
0
,
False
),
(
1
,
1
,
True
),
(
1
,
2
,
True
),
...
@@ -593,8 +592,7 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
...
@@ -593,8 +592,7 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
profiler
.
get_dummy_data
(
model_config
.
max_model_len
)
profiler
.
get_dummy_data
(
model_config
.
max_model_len
)
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"model_id"
,
[
"llava-hf/llava-v1.6-mistral-7b-hf"
])
"model_id"
,
[
"s3://vllm-ci-model-weights/llava-v1.6-mistral-7b-hf"
])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
(
"num_images"
,
"limit"
,
"is_valid"
),
(
"num_images"
,
"limit"
,
"is_valid"
),
[(
0
,
0
,
True
),
(
0
,
1
,
True
),
(
1
,
0
,
False
),
(
1
,
1
,
True
),
(
1
,
2
,
True
),
[(
0
,
0
,
True
),
(
0
,
1
,
True
),
(
1
,
0
,
False
),
(
1
,
1
,
True
),
(
1
,
2
,
True
),
...
...
tests/prefix_caching/test_prefix_caching.py
View file @
2c5e637b
...
@@ -16,7 +16,7 @@ from vllm.engine.llm_engine import LLMEngine
...
@@ -16,7 +16,7 @@ from vllm.engine.llm_engine import LLMEngine
from
..models.utils
import
check_outputs_equal
from
..models.utils
import
check_outputs_equal
MODELS
=
[
MODELS
=
[
"
facebook/opt-125m
"
,
"
distilbert/distilgpt2
"
,
]
]
UNSTABLE_PROMPT_SEQUENCE
=
[
UNSTABLE_PROMPT_SEQUENCE
=
[
...
...
tests/test_config.py
View file @
2c5e637b
...
@@ -8,20 +8,14 @@ from vllm.config import ModelConfig, PoolerConfig
...
@@ -8,20 +8,14 @@ from vllm.config import ModelConfig, PoolerConfig
from
vllm.model_executor.layers.pooler
import
PoolingType
from
vllm.model_executor.layers.pooler
import
PoolingType
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
.conftest
import
MODEL_WEIGHTS_S3_BUCKET
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
(
"model_id"
,
"expected_runner_type"
,
"expected_task"
),
(
"model_id"
,
"expected_runner_type"
,
"expected_task"
),
[
[
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
,
"generate"
,
(
"distilbert/distilgpt2"
,
"generate"
,
"generate"
),
"generate"
),
(
"intfloat/e5-mistral-7b-instruct"
,
"pooling"
,
"embed"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/intfloat/e5-mistral-7b-instruct"
,
(
"jason9693/Qwen2.5-1.5B-apeach"
,
"pooling"
,
"classify"
),
"pooling"
,
"embed"
),
(
"cross-encoder/ms-marco-MiniLM-L-6-v2"
,
"pooling"
,
"score"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/jason9693/Qwen2.5-1.5B-apeach"
,
"pooling"
,
"classify"
),
(
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/cross-encoder/ms-marco-MiniLM-L-6-v2"
,
"pooling"
,
"score"
),
(
"Qwen/Qwen2.5-Math-RM-72B"
,
"pooling"
,
"reward"
),
(
"Qwen/Qwen2.5-Math-RM-72B"
,
"pooling"
,
"reward"
),
(
"openai/whisper-small"
,
"transcription"
,
"transcription"
),
(
"openai/whisper-small"
,
"transcription"
,
"transcription"
),
],
],
...
...
tests/test_regression.py
View file @
2c5e637b
...
@@ -10,9 +10,6 @@ import gc
...
@@ -10,9 +10,6 @@ import gc
import
torch
import
torch
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
from
vllm.config
import
LoadFormat
from
.conftest
import
MODEL_WEIGHTS_S3_BUCKET
def
test_duplicated_ignored_sequence_group
():
def
test_duplicated_ignored_sequence_group
():
...
@@ -21,8 +18,7 @@ def test_duplicated_ignored_sequence_group():
...
@@ -21,8 +18,7 @@ def test_duplicated_ignored_sequence_group():
sampling_params
=
SamplingParams
(
temperature
=
0.01
,
sampling_params
=
SamplingParams
(
temperature
=
0.01
,
top_p
=
0.1
,
top_p
=
0.1
,
max_tokens
=
256
)
max_tokens
=
256
)
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
,
llm
=
LLM
(
model
=
"distilbert/distilgpt2"
,
load_format
=
LoadFormat
.
RUNAI_STREAMER
,
max_num_batched_tokens
=
4096
,
max_num_batched_tokens
=
4096
,
tensor_parallel_size
=
1
)
tensor_parallel_size
=
1
)
prompts
=
[
"This is a short prompt"
,
"This is a very long prompt "
*
1000
]
prompts
=
[
"This is a short prompt"
,
"This is a very long prompt "
*
1000
]
...
@@ -35,8 +31,7 @@ def test_max_tokens_none():
...
@@ -35,8 +31,7 @@ def test_max_tokens_none():
sampling_params
=
SamplingParams
(
temperature
=
0.01
,
sampling_params
=
SamplingParams
(
temperature
=
0.01
,
top_p
=
0.1
,
top_p
=
0.1
,
max_tokens
=
None
)
max_tokens
=
None
)
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
,
llm
=
LLM
(
model
=
"distilbert/distilgpt2"
,
load_format
=
LoadFormat
.
RUNAI_STREAMER
,
max_num_batched_tokens
=
4096
,
max_num_batched_tokens
=
4096
,
tensor_parallel_size
=
1
)
tensor_parallel_size
=
1
)
prompts
=
[
"Just say hello!"
]
prompts
=
[
"Just say hello!"
]
...
@@ -46,9 +41,7 @@ def test_max_tokens_none():
...
@@ -46,9 +41,7 @@ def test_max_tokens_none():
def
test_gc
():
def
test_gc
():
llm
=
LLM
(
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/distilbert/distilgpt2"
,
llm
=
LLM
(
model
=
"distilbert/distilgpt2"
,
enforce_eager
=
True
)
load_format
=
LoadFormat
.
RUNAI_STREAMER
,
enforce_eager
=
True
)
del
llm
del
llm
gc
.
collect
()
gc
.
collect
()
...
...
tests/worker/test_swap.py
View file @
2c5e637b
...
@@ -10,7 +10,7 @@ from vllm.worker.worker import Worker
...
@@ -10,7 +10,7 @@ from vllm.worker.worker import Worker
def
test_swap
()
->
None
:
def
test_swap
()
->
None
:
# Configure the engine.
# Configure the engine.
engine_args
=
EngineArgs
(
model
=
"
s3://vllm-ci-model-weights
/distilgpt2"
,
engine_args
=
EngineArgs
(
model
=
"
distilbert
/distilgpt2"
,
dtype
=
"half"
,
dtype
=
"half"
,
load_format
=
"dummy"
)
load_format
=
"dummy"
)
engine_config
=
engine_args
.
create_engine_config
()
engine_config
=
engine_args
.
create_engine_config
()
...
...
vllm/engine/arg_utils.py
View file @
2c5e637b
...
@@ -22,6 +22,7 @@ from vllm.executor.executor_base import ExecutorBase
...
@@ -22,6 +22,7 @@ from vllm.executor.executor_base import ExecutorBase
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.quantization
import
QUANTIZATION_METHODS
from
vllm.model_executor.layers.quantization
import
QUANTIZATION_METHODS
from
vllm.plugins
import
load_general_plugins
from
vllm.plugins
import
load_general_plugins
from
vllm.test_utils
import
MODEL_WEIGHTS_S3_BUCKET
,
MODELS_ON_S3
from
vllm.transformers_utils.utils
import
check_gguf_file
from
vllm.transformers_utils.utils
import
check_gguf_file
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
FlexibleArgumentParser
,
StoreBoolean
from
vllm.utils
import
FlexibleArgumentParser
,
StoreBoolean
...
@@ -1141,6 +1142,14 @@ class EngineArgs:
...
@@ -1141,6 +1142,14 @@ class EngineArgs:
f
", but got
{
self
.
cpu_offload_gb
}
"
)
f
", but got
{
self
.
cpu_offload_gb
}
"
)
device_config
=
DeviceConfig
(
device
=
self
.
device
)
device_config
=
DeviceConfig
(
device
=
self
.
device
)
# NOTE: This is to allow model loading from S3 in CI
if
(
not
isinstance
(
self
,
AsyncEngineArgs
)
and
envs
.
VLLM_CI_USE_S3
and
self
.
model
in
MODELS_ON_S3
and
self
.
load_format
==
LoadFormat
.
AUTO
):
# noqa: E501
self
.
model
=
f
"
{
MODEL_WEIGHTS_S3_BUCKET
}
/
{
self
.
model
}
"
self
.
load_format
=
LoadFormat
.
RUNAI_STREAMER
model_config
=
self
.
create_model_config
()
model_config
=
self
.
create_model_config
()
if
(
model_config
.
is_multimodal_model
and
not
envs
.
VLLM_USE_V1
if
(
model_config
.
is_multimodal_model
and
not
envs
.
VLLM_USE_V1
...
...
vllm/envs.py
View file @
2c5e637b
...
@@ -618,6 +618,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
...
@@ -618,6 +618,10 @@ environment_variables: Dict[str, Callable[[], Any]] = {
# Port of the master node in the data parallel setting
# Port of the master node in the data parallel setting
"VLLM_DP_MASTER_PORT"
:
"VLLM_DP_MASTER_PORT"
:
lambda
:
int
(
os
.
getenv
(
"VLLM_DP_MASTER_PORT"
,
"0"
)),
lambda
:
int
(
os
.
getenv
(
"VLLM_DP_MASTER_PORT"
,
"0"
)),
# Whether to use S3 path for model loading in CI via RunAI Streamer
"VLLM_CI_USE_S3"
:
lambda
:
os
.
environ
.
get
(
"VLLM_CI_USE_S3"
,
"0"
)
==
"1"
,
}
}
# end-env-vars-definition
# end-env-vars-definition
...
...
vllm/model_executor/model_loader/loader.py
View file @
2c5e637b
...
@@ -1394,7 +1394,6 @@ class RunaiModelStreamerLoader(BaseModelLoader):
...
@@ -1394,7 +1394,6 @@ class RunaiModelStreamerLoader(BaseModelLoader):
def
get_model_loader
(
load_config
:
LoadConfig
)
->
BaseModelLoader
:
def
get_model_loader
(
load_config
:
LoadConfig
)
->
BaseModelLoader
:
"""Get a model loader based on the load format."""
"""Get a model loader based on the load format."""
if
isinstance
(
load_config
.
load_format
,
type
):
if
isinstance
(
load_config
.
load_format
,
type
):
return
load_config
.
load_format
(
load_config
)
return
load_config
.
load_format
(
load_config
)
...
...
vllm/test_utils.py
0 → 100644
View file @
2c5e637b
# SPDX-License-Identifier: Apache-2.0
MODELS_ON_S3
=
[
"adept/fuyu-8b"
,
"ai21labs/AI21-Jamba-1.5-Mini"
,
"ai21labs/Jamba-tiny-random"
,
"ai21labs/Jamba-tiny-reward-dev"
,
"allenai/Molmo-7B-D-0924"
,
"allenai/OLMo-1B-hf"
,
"allenai/OLMoE-1B-7B-0924-Instruct"
,
"amd/Llama-3.1-8B-Instruct-FP8-KV-Quark-test"
,
"AMead10/Llama-3.2-1B-Instruct-AWQ"
,
"ArthurZ/Ilama-3.2-1B"
,
"BAAI/bge-base-en-v1.5"
,
"BAAI/bge-multilingual-gemma2"
,
"BAAI/bge-reranker-v2-m3"
,
"bigcode/starcoder2-3b"
,
"cross-encoder/ms-marco-MiniLM-L-6-v2"
,
"cross-encoder/quora-roberta-base"
,
"deepseek-ai/deepseek-vl2-tiny"
,
"distilbert/distilgpt2"
,
"facebook/bart-base"
,
"facebook/bart-large-cnn"
,
# "fixie-ai/ultravox-v0_5-llama-3_2-1b",
"google/gemma-1.1-2b-it"
,
"google/gemma-2-2b-it"
,
"google/paligemma-3b-pt-224"
,
"h2oai/h2ovl-mississippi-800m"
,
"HuggingFaceM4/Idefics3-8B-Llama3"
,
"internlm/internlm2-1_8b-reward"
,
"intfloat/e5-mistral-7b-instruct"
,
"intfloat/multilingual-e5-large"
,
"jason9693/Qwen2.5-1.5B-apeach"
,
"llava-hf/llava-1.5-7b-hf"
,
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
,
"llava-hf/llava-v1.6-mistral-7b-hf"
,
"llava-hf/LLaVA-NeXT-Video-7B-hf"
,
# "meta-llama/Llama-2-7b-hf",
"meta-llama/Llama-3.2-11B-Vision-Instruct"
,
"meta-llama/Llama-3.2-1B"
,
"meta-llama/Llama-3.2-1B-Instruct"
,
"meta-llama/Meta-Llama-3-8B"
,
"microsoft/phi-2"
,
"microsoft/Phi-3-mini-4k-instruct"
,
"microsoft/Phi-3-small-8k-instruct"
,
"microsoft/Phi-3-vision-128k-instruct"
,
"microsoft/Phi-3.5-MoE-instruct"
,
"microsoft/Phi-3.5-vision-instruct"
,
# "mistralai/Mistral-7B-Instruct-v0.1",
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
"mistralai/Pixtral-12B-2409"
,
"mistral-community/Mixtral-8x22B-v0.1-AWQ"
,
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head"
,
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse"
,
"ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue"
,
"ModelCloud/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit-10-25-2024"
,
"neuralmagic/Llama-3.2-1B-quantized.w8a8"
,
"neuralmagic/Meta-Llama-3-8B-Instruct-FP8"
,
"neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV"
,
"nm-testing/asym-w8w8-int8-static-per-tensor-tiny-llama"
,
"nm-testing/llama2.c-stories42M-pruned2.4-compressed"
,
"nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t"
,
"nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test"
,
"nm-testing/Meta-Llama-3-8B-Instruct-FP8-Dynamic-2of4-testing"
,
"nm-testing/Meta-Llama-3-8B-Instruct-FP8-Dynamic-IA-Per-Tensor-Weight-testing"
,
"nm-testing/Meta-Llama-3-8B-Instruct-FP8-Static-Per-Tensor-testing"
,
"nm-testing/Meta-Llama-3-8B-Instruct-FP8-Static-testing"
,
"nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dynamic-Asym"
,
"nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Asym"
,
"nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Sym"
,
"nm-testing/Phi-3-mini-128k-instruct-FP8"
,
"nm-testing/Qwen2-0.5B-Instruct-FP8-SkipQKV"
,
"nm-testing/Qwen2-1.5B-Instruct-FP8-K-V"
,
"nm-testing/tinyllama-oneshot-w4a16-channel-v2"
,
"nm-testing/tinyllama-oneshot-w4a16-group128-v2"
,
"nm-testing/tinyllama-oneshot-w8-channel-a8-tensor"
,
"nm-testing/tinyllama-oneshot-w8a16-per-channel"
,
"nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2"
,
"nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2-asym"
,
"nm-testing/tinyllama-oneshot-w8a8-dynamic-token-v2"
,
"nm-testing/tinyllama-oneshot-w8a8-dynamic-token-v2-asym"
,
"nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-2of4-Sparse-Dense-Compressor"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_int8-BitM"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_tensor_act_fp8-BitM"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_tensor_act_int8-BitM"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_per_tok_dyn_act_fp8-BitM"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_per_tok_dyn_act_int8-BitM"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_tensor_act_fp8-BitM"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_tensor_act_int8-BitM"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Channel-Weight-testing"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Tensor-Weight-testing"
,
"nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Static-testing"
,
"nm-testing/TinyLlama-1.1B-compressed-tensors-kv-cache-scheme"
,
"nvidia/NVLM-D-72B"
,
"openai-community/gpt2"
,
# "openai/whisper-large-v3",
"openbmb/MiniCPM-o-2_6"
,
"openbmb/MiniCPM-V-2_6"
,
"OpenGVLab/InternVL2-1B"
,
"parasail-ai/GritLM-7B-vllm"
,
"Qwen/Qwen1.5-MoE-A2.7B-Chat"
,
"Qwen/Qwen2-7B-Instruct"
,
"Qwen/Qwen2-Audio-7B-Instruct"
,
"Qwen/Qwen2-VL-2B-Instruct"
,
"Qwen/Qwen2.5-1.5B-Instruct"
,
"Qwen/Qwen2.5-Math-PRM-7B"
,
"Qwen/Qwen2.5-Math-RM-72B"
,
"Qwen/Qwen2.5-VL-3B-Instruct"
,
"royokong/e5-v"
,
"sentence-transformers/all-roberta-large-v1"
,
"sentence-transformers/stsb-roberta-base-v2"
,
"shanearora/OLMo-7B-1124-hf"
,
"shuyuej/Llama-3.2-1B-Instruct-GPTQ"
,
"ssmits/Qwen2-7B-Instruct-embed-base"
,
"stabilityai/stablelm-3b-4e1t"
,
"stabilityai/stablelm-zephyr-3b"
,
"state-spaces/mamba-130m-hf"
,
"TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ"
,
"THUDM/glm-4v-9b"
,
"TIGER-Lab/Mantis-8B-siglip-llama3"
,
"TIGER-Lab/VLM2Vec-Full"
,
"tiiuae/falcon-40b"
,
"tiiuae/falcon-mamba-7b-instruct"
,
"TinyLlama/TinyLlama-1.1B-Chat-v1.0"
,
"upstage/solar-pro-preview-instruct"
,
]
MODEL_WEIGHTS_S3_BUCKET
=
"s3://vllm-ci-model-weights"
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment