Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
64ea24d0
Unverified
Commit
64ea24d0
authored
Jan 21, 2025
by
Kevin H. Luu
Committed by
GitHub
Jan 22, 2025
Browse files
[ci/lint] Add back default arg for pre-commit (#12279)
Signed-off-by:
kevin
<
kevin@anyscale.com
>
parent
df76e5af
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
26 additions
and
29 deletions
+26
-29
.github/workflows/pre-commit.yml
.github/workflows/pre-commit.yml
+1
-1
tests/models/decoder_only/language/test_gguf.py
tests/models/decoder_only/language/test_gguf.py
+7
-10
vllm/model_executor/models/paligemma.py
vllm/model_executor/models/paligemma.py
+1
-1
vllm/model_executor/models/siglip.py
vllm/model_executor/models/siglip.py
+3
-5
vllm/platforms/__init__.py
vllm/platforms/__init__.py
+1
-1
vllm/v1/stats/common.py
vllm/v1/stats/common.py
+13
-11
No files found.
.github/workflows/pre-commit.yml
View file @
64ea24d0
...
@@ -16,4 +16,4 @@ jobs:
...
@@ -16,4 +16,4 @@ jobs:
-
run
:
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
-
run
:
echo "::add-matcher::.github/workflows/matchers/actionlint.json"
-
uses
:
pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd
# v3.0.1
-
uses
:
pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd
# v3.0.1
with
:
with
:
extra_args
:
--hook-stage manual
extra_args
:
--all-files
--hook-stage manual
tests/models/decoder_only/language/test_gguf.py
View file @
64ea24d0
...
@@ -74,11 +74,7 @@ DOLPHIN_CONFIG = GGUFTestConfig(
...
@@ -74,11 +74,7 @@ DOLPHIN_CONFIG = GGUFTestConfig(
)
)
MODELS
=
[
MODELS
=
[
LLAMA_CONFIG
,
LLAMA_CONFIG
,
QWEN2_CONFIG
,
PHI3_CONFIG
,
GPT2_CONFIG
,
STABLELM_CONFIG
,
QWEN2_CONFIG
,
PHI3_CONFIG
,
GPT2_CONFIG
,
STABLELM_CONFIG
,
DOLPHIN_CONFIG
DOLPHIN_CONFIG
# STARCODER_CONFIG, # broken
# STARCODER_CONFIG, # broken
]
]
...
@@ -114,7 +110,8 @@ def test_models(
...
@@ -114,7 +110,8 @@ def test_models(
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
# Run unquantized model.
# Run unquantized model.
with
vllm_runner
(
model_name
=
model
.
original_model
,
with
vllm_runner
(
model_name
=
model
.
original_model
,
enforce_eager
=
True
,
# faster tests
enforce_eager
=
True
,
# faster tests
dtype
=
dtype
,
dtype
=
dtype
,
max_model_len
=
MAX_MODEL_LEN
,
max_model_len
=
MAX_MODEL_LEN
,
...
...
vllm/model_executor/models/paligemma.py
View file @
64ea24d0
vllm/model_executor/models/siglip.py
View file @
64ea24d0
...
@@ -350,10 +350,8 @@ class SiglipMLP(nn.Module):
...
@@ -350,10 +350,8 @@ class SiglipMLP(nn.Module):
else
:
else
:
# For other quantization, we require the hidden size to be a
# For other quantization, we require the hidden size to be a
# multiple of 64
# multiple of 64
quantizable
=
(
quantizable
=
(
config
.
hidden_size
%
64
==
0
config
.
hidden_size
%
64
==
0
and
config
.
intermediate_size
%
64
==
0
)
and
config
.
intermediate_size
%
64
==
0
)
self
.
fc1
=
ColumnParallelLinear
(
self
.
fc1
=
ColumnParallelLinear
(
config
.
hidden_size
,
config
.
hidden_size
,
config
.
intermediate_size
,
config
.
intermediate_size
,
...
...
vllm/platforms/__init__.py
View file @
64ea24d0
...
@@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]:
...
@@ -101,7 +101,7 @@ def cpu_platform_plugin() -> Optional[str]:
try
:
try
:
from
importlib.metadata
import
version
from
importlib.metadata
import
version
is_cpu
=
"cpu"
in
version
(
"vllm"
)
is_cpu
=
"cpu"
in
version
(
"vllm"
)
if
is_cpu
==
False
:
if
not
is_cpu
:
import
platform
import
platform
is_cpu
=
platform
.
machine
().
lower
().
startswith
(
"arm"
)
is_cpu
=
platform
.
machine
().
lower
().
startswith
(
"arm"
)
...
...
vllm/v1/stats/common.py
View file @
64ea24d0
...
@@ -10,7 +10,8 @@ from msgspec import field as msgspec_field
...
@@ -10,7 +10,8 @@ from msgspec import field as msgspec_field
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
class
RequestStatsUpdate
(
msgspec
.
Struct
,
class
RequestStatsUpdate
(
msgspec
.
Struct
,
# type: ignore
array_like
=
True
,
array_like
=
True
,
omit_defaults
=
True
,
omit_defaults
=
True
,
gc
=
False
):
gc
=
False
):
...
@@ -341,8 +342,8 @@ class RequestStats:
...
@@ -341,8 +342,8 @@ class RequestStats:
self
.
queued_ts_s
=
ts
self
.
queued_ts_s
=
ts
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
PREFILLING
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
PREFILLING
:
self
.
prefill_start_ts_s_lst
.
append
(
ts
)
self
.
prefill_start_ts_s_lst
.
append
(
ts
)
self
.
num_cached_tokens
=
update
.
num_cached_tokens
self
.
num_cached_tokens
=
update
.
num_cached_tokens
or
0
self
.
num_computed_tokens
=
update
.
num_computed_tokens
self
.
num_computed_tokens
=
update
.
num_computed_tokens
or
0
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
PREEMPTED
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
PREEMPTED
:
self
.
_reset_for_preemption
(
ts
)
self
.
_reset_for_preemption
(
ts
)
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
DECODING
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
DECODING
:
...
@@ -350,7 +351,7 @@ class RequestStats:
...
@@ -350,7 +351,7 @@ class RequestStats:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
DETOKENIZED
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
DETOKENIZED
:
self
.
_record_detokenized_output
(
self
.
_record_detokenized_output
(
ts
,
ts
,
update
.
num_new_tokens
,
update
.
num_new_tokens
or
0
,
)
)
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
FINISHED
:
elif
update
.
type
==
RequestStatsUpdate
.
Type
.
FINISHED
:
self
.
finished_ts_s
=
ts
self
.
finished_ts_s
=
ts
...
@@ -425,7 +426,8 @@ class EngineCoreProcessStats:
...
@@ -425,7 +426,8 @@ class EngineCoreProcessStats:
output_queue_size
:
Optional
[
int
]
=
None
output_queue_size
:
Optional
[
int
]
=
None
class
EngineCoreStatsSnapshot
(
msgspec
.
Struct
,
class
EngineCoreStatsSnapshot
(
msgspec
.
Struct
,
# type: ignore
array_like
=
True
,
array_like
=
True
,
omit_defaults
=
True
,
omit_defaults
=
True
,
gc
=
False
):
gc
=
False
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment