Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
79a268c4
Unverified
Commit
79a268c4
authored
Apr 23, 2024
by
Robert Shaw
Committed by
GitHub
Apr 23, 2024
Browse files
[BUG] fixed fp8 conflict with aqlm (#4307)
Fixes fp8 iterface which broke in AQLM merge.
parent
eace8bf0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
4 deletions
+18
-4
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+3
-0
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+13
-3
vllm/model_executor/layers/quantization/fp8.py
vllm/model_executor/layers/quantization/fp8.py
+2
-1
No files found.
.buildkite/test-pipeline.yaml
View file @
79a268c4
...
@@ -96,6 +96,9 @@ steps:
...
@@ -96,6 +96,9 @@ steps:
-
label
:
Metrics Test
-
label
:
Metrics Test
command
:
pytest -v -s metrics
command
:
pytest -v -s metrics
-
label
:
Quantization Test
command
:
pytest -v -s quantization
-
label
:
Benchmarks
-
label
:
Benchmarks
working_dir
:
"
/vllm-workspace/.buildkite"
working_dir
:
"
/vllm-workspace/.buildkite"
commands
:
commands
:
...
...
vllm/model_executor/layers/linear.py
View file @
79a268c4
...
@@ -34,9 +34,19 @@ class LinearMethodBase(ABC):
...
@@ -34,9 +34,19 @@ class LinearMethodBase(ABC):
output_partition_sizes
:
List
[
int
],
input_size
:
int
,
output_partition_sizes
:
List
[
int
],
input_size
:
int
,
output_size
:
int
,
params_dtype
:
torch
.
dtype
,
output_size
:
int
,
params_dtype
:
torch
.
dtype
,
**
extra_weight_attrs
):
**
extra_weight_attrs
):
"""Create weights for a linear layer.
"""Create weights for a linear layer.
The weights will be set as attributes of the layer.
The weights will be set as attributes of the layer."""
Args:
layer: The layer that is using the LinearMethodBase factory.
input_size_per_partition: Size of the weight input dim on rank X.
output_partition_sizes: Sizes of the output dim of each logical
weight on rank X. E.g., output_partition_sizes for QKVLinear
is a list contains the width of Wq, Wk, Wv on rank X.
input_size: Size of the input dim of the weight across all ranks.
output_size: Size of the output dim of the weight across all ranks.
params_dtype: Datatype of the parameters.
"""
raise
NotImplementedError
raise
NotImplementedError
@
abstractmethod
@
abstractmethod
...
...
vllm/model_executor/layers/quantization/fp8.py
View file @
79a268c4
...
@@ -64,12 +64,13 @@ class Fp8LinearMethod(LinearMethodBase):
...
@@ -64,12 +64,13 @@ class Fp8LinearMethod(LinearMethodBase):
self
,
self
,
layer
:
torch
.
nn
.
Module
,
layer
:
torch
.
nn
.
Module
,
input_size_per_partition
:
int
,
input_size_per_partition
:
int
,
output_
size_per_partition
:
int
,
output_
partition_sizes
:
List
[
int
]
,
input_size
:
int
,
input_size
:
int
,
output_size
:
int
,
output_size
:
int
,
params_dtype
:
torch
.
dtype
,
params_dtype
:
torch
.
dtype
,
**
extra_weight_attrs
,
**
extra_weight_attrs
,
):
):
output_size_per_partition
=
sum
(
output_partition_sizes
)
weight
=
Parameter
(
torch
.
empty
(
output_size_per_partition
,
weight
=
Parameter
(
torch
.
empty
(
output_size_per_partition
,
input_size_per_partition
,
input_size_per_partition
,
dtype
=
params_dtype
),
dtype
=
params_dtype
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment