Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
64ea24d0
Unverified
Commit
64ea24d0
authored
Jan 21, 2025
by
Kevin H. Luu
Committed by
GitHub
Jan 22, 2025
Browse files
[ci/lint] Add back default arg for pre-commit (#12279)
Signed-off-by:
kevin
<
kevin@anyscale.com
>
parent
df76e5af
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
26 additions
and
29 deletions
+26
-29
.github/workflows/pre-commit.yml
.github/workflows/pre-commit.yml
+1
-1
tests/models/decoder_only/language/test_gguf.py
tests/models/decoder_only/language/test_gguf.py
+7
-10
vllm/model_executor/models/paligemma.py
vllm/model_executor/models/paligemma.py
+1
-1
vllm/model_executor/models/siglip.py
vllm/model_executor/models/siglip.py
+3
-5
vllm/platforms/__init__.py
vllm/platforms/__init__.py
+1
-1
vllm/v1/stats/common.py
vllm/v1/stats/common.py
+13
-11
No files found.
.github/workflows/pre-commit.yml
View file @
64ea24d0
...
@@ -16,4 +16,4 @@ jobs:
...
@@ -16,4 +16,4 @@ jobs:
-
run
:
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
-
run
:
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
-
uses
:
pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd
# v3.0.1
-
uses
:
pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd
# v3.0.1
with
:
with
:
extra_args
:
--hook-stage manual
extra_args
:
--all-files
--hook-stage manual
tests/models/decoder_only/language/test_gguf.py
View file @
64ea24d0
...
@@ -74,11 +74,7 @@ DOLPHIN_CONFIG = GGUFTestConfig(
...
@@ -74,11 +74,7 @@ DOLPHIN_CONFIG = GGUFTestConfig(
)
)
MODELS
=
[
MODELS
=
[
LLAMA_CONFIG
,
LLAMA_CONFIG
,
QWEN2_CONFIG
,
PHI3_CONFIG
,
GPT2_CONFIG
,
STABLELM_CONFIG
,
QWEN2_CONFIG
,
PHI3_CONFIG
,
GPT2_CONFIG
,
STABLELM_CONFIG
,
DOLPHIN_CONFIG
DOLPHIN_CONFIG
# STARCODER_CONFIG, # broken
# STARCODER_CONFIG, # broken
]
]
...
@@ -114,11 +110,12 @@ def test_models(
...
@@ -114,11 +110,12 @@ def test_models(
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
# Run unquantized model.
# Run unquantized model.
with
vllm_runner
(
model_name
=
model
.
original_model
,
with
vllm_runner
(
enforce_eager
=
True
,
# faster tests
model_name
=
model
.
original_model
,
dtype
=
dtype
,
enforce_eager
=
True
,
# faster tests
max_model_len
=
MAX_MODEL_LEN
,
dtype
=
dtype
,
tensor_parallel_size
=
tp_size
)
as
original_model
:
max_model_len
=
MAX_MODEL_LEN
,
tensor_parallel_size
=
tp_size
)
as
original_model
:
original_outputs
=
original_model
.
generate_greedy_logprobs
(
original_outputs
=
original_model
.
generate_greedy_logprobs
(
example_prompts
[:
-
1
],
max_tokens
,
num_logprobs
)
example_prompts
[:
-
1
],
max_tokens
,
num_logprobs
)
...
...
vllm/model_executor/models/paligemma.py
View file @
64ea24d0
...
@@ -147,7 +147,7 @@ class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -147,7 +147,7 @@ class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal,
"up_proj"
,
"up_proj"
,
],
],
}
}
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
config
=
vllm_config
.
model_config
.
hf_config
...
...
vllm/model_executor/models/siglip.py
View file @
64ea24d0
...
@@ -348,12 +348,10 @@ class SiglipMLP(nn.Module):
...
@@ -348,12 +348,10 @@ class SiglipMLP(nn.Module):
if
quant_config
and
quant_config
.
get_name
()
==
"bitsandbytes"
:
if
quant_config
and
quant_config
.
get_name
()
==
"bitsandbytes"
:
quantizable
=
True
quantizable
=
True
else
:
else
:
# For other quantization, we require the hidden size to be a
# For other quantization, we require the hidden size to be a
# multiple of 64
# multiple of 64
quantizable
=
(
quantizable
=
(
config
.
hidden_size
%
64
==
0
config
.
hidden_size
%
64
==
0
and
config
.
intermediate_size
%
64
==
0
)
and
config
.
intermediate_size
%
64
==
0
)
self
.
fc1
=
ColumnParallelLinear
(
self
.
fc1
=
ColumnParallelLinear
(
config
.
hidden_size
,
config
.
hidden_size
,
config
.
intermediate_size
,
config
.
intermediate_size
,
...
...
vllm/platforms/__init__.py
View file @
64ea24d0
...
@@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]:
...
@@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]:
try
:
try
:
from
importlib.metadata
import
version
from
importlib.metadata
import
version
is_cpu
=
"cpu"
in
version
(
"vllm"
)
is_cpu
=
"cpu"
in
version
(
"vllm"
)
if
is_cpu
==
False
:
if
not
is_cpu
:
import
platform
import
platform
is_cpu
=
platform
.
machine
().
lower
().
startswith
(
"arm"
)
is_cpu
=
platform
.
machine
().
lower
().
startswith
(
"arm"
)
...
...
vllm/v1/stats/common.py
View file @
64ea24d0
...
@@ -10,10 +10,11 @@ from msgspec import field as msgspec_field
...
@@ -10,10 +10,11 @@ from msgspec import field as msgspec_field
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
class
RequestStatsUpdate
(
msgspec
.
Struct
,
class
RequestStatsUpdate
(
array_like
=
True
,
msgspec
.
Struct
,
# type: ignore
omit_defaults
=
True
,
array_like
=
True
,
gc
=
False
):
omit_defaults
=
True
,
gc
=
False
):
"""
"""
An update to the request stats.
An update to the request stats.
...
@@ -341,8 +342,8 @@ class RequestStats:
...
@@ -341,8 +342,8 @@ class RequestStats:
self
.
queued_ts_s
=
ts
self
.
queued_ts_s
=
ts
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
PREFILLING
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
PREFILLING
:
self
.
prefill_start_ts_s_lst
.
append
(
ts
)
self
.
prefill_start_ts_s_lst
.
append
(
ts
)
self
.
num_cached_tokens
=
update
.
num_cached_tokens
self
.
num_cached_tokens
=
update
.
num_cached_tokens
or
0
self
.
num_computed_tokens
=
update
.
num_computed_tokens
self
.
num_computed_tokens
=
update
.
num_computed_tokens
or
0
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
PREEMPTED
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
PREEMPTED
:
self
.
_reset_for_preemption
(
ts
)
self
.
_reset_for_preemption
(
ts
)
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
DECODING
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
DECODING
:
...
@@ -350,7 +351,7 @@ class RequestStats:
...
@@ -350,7 +351,7 @@ class RequestStats:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
DETOKENIZED
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
DETOKENIZED
:
self
.
_record_detokenized_output
(
self
.
_record_detokenized_output
(
ts
,
ts
,
update
.
num_new_tokens
,
update
.
num_new_tokens
or
0
,
)
)
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
FINISHED
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
FINISHED
:
self
.
finished_ts_s
=
ts
self
.
finished_ts_s
=
ts
...
@@ -425,10 +426,11 @@ class EngineCoreProcessStats:
...
@@ -425,10 +426,11 @@ class EngineCoreProcessStats:
output_queue_size
:
Optional
[
int
]
=
None
output_queue_size
:
Optional
[
int
]
=
None
class
EngineCoreStatsSnapshot
(
msgspec
.
Struct
,
class
EngineCoreStatsSnapshot
(
array_like
=
True
,
msgspec
.
Struct
,
# type: ignore
omit_defaults
=
True
,
array_like
=
True
,
gc
=
False
):
omit_defaults
=
True
,
gc
=
False
):
"""
"""
A snapshot of the EngineCore's current stats over a period of time.
A snapshot of the EngineCore's current stats over a period of time.
"""
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment