Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
145787ae
Commit
145787ae
authored
May 25, 2024
by
zhuwenwen
Browse files
fix merge
parent
408f0a79
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
6 additions
and
29 deletions
+6
-29
tests/kernels/test_attention.py
tests/kernels/test_attention.py
+1
-5
tests/kernels/test_cache.py
tests/kernels/test_cache.py
+1
-5
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/linear.py
+1
-10
vllm/model_executor/model_loader/loader.py
vllm/model_executor/model_loader/loader.py
+3
-9
No files found.
tests/kernels/test_attention.py
View file @
145787ae
...
@@ -33,11 +33,7 @@ HEAD_SIZES = [64, 80, 96, 112, 128, 256
...
@@ -33,11 +33,7 @@ HEAD_SIZES = [64, 80, 96, 112, 128, 256
BLOCK_SIZES
=
[
16
,
32
]
BLOCK_SIZES
=
[
16
,
32
]
USE_ALIBI
=
[
False
,
True
]
USE_ALIBI
=
[
False
,
True
]
<<<<<<<
HEAD
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8"
]
if
not
is_hip
()
else
[
"auto"
]
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8_e5m2"
]
if
not
is_hip
()
else
[
"auto"
]
=======
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8"
]
>>>>>>>
v0
.
4.1
SEEDS
=
[
0
]
SEEDS
=
[
0
]
CUDA_DEVICES
=
[
CUDA_DEVICES
=
[
f
"cuda:
{
i
}
"
for
i
in
range
(
1
if
torch
.
cuda
.
device_count
()
==
1
else
2
)
f
"cuda:
{
i
}
"
for
i
in
range
(
1
if
torch
.
cuda
.
device_count
()
==
1
else
2
)
...
...
tests/kernels/test_cache.py
View file @
145787ae
...
@@ -25,11 +25,7 @@ SEEDS = [0]
...
@@ -25,11 +25,7 @@ SEEDS = [0]
CUDA_DEVICES
=
[
CUDA_DEVICES
=
[
f
"cuda:
{
i
}
"
for
i
in
range
(
1
if
torch
.
cuda
.
device_count
()
==
1
else
2
)
f
"cuda:
{
i
}
"
for
i
in
range
(
1
if
torch
.
cuda
.
device_count
()
==
1
else
2
)
]
]
<<<<<<<
HEAD
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8"
]
if
not
is_hip
()
else
[
"auto"
]
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8_e5m2"
]
if
not
is_hip
()
else
[
"auto"
]
=======
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8"
]
>>>>>>>
v0
.
4.1
@
pytest
.
mark
.
parametrize
(
"num_mappings"
,
NUM_MAPPINGS
)
@
pytest
.
mark
.
parametrize
(
"num_mappings"
,
NUM_MAPPINGS
)
...
...
vllm/model_executor/layers/linear.py
View file @
145787ae
...
@@ -346,13 +346,8 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
...
@@ -346,13 +346,8 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
self
.
output_sizes
=
output_sizes
self
.
output_sizes
=
output_sizes
tp_size
=
get_tensor_model_parallel_world_size
()
tp_size
=
get_tensor_model_parallel_world_size
()
assert
all
(
output_size
%
tp_size
==
0
for
output_size
in
output_sizes
)
assert
all
(
output_size
%
tp_size
==
0
for
output_size
in
output_sizes
)
<<<<<<<
HEAD
super
().
__init__
(
input_size
,
sum
(
output_sizes
),
bias
,
gather_output
,
skip_bias_add
,
params_dtype
,
linear_method
,
=======
super
().
__init__
(
input_size
,
sum
(
output_sizes
),
bias
,
gather_output
,
super
().
__init__
(
input_size
,
sum
(
output_sizes
),
bias
,
gather_output
,
skip_bias_add
,
params_dtype
,
quant_config
,
skip_bias_add
,
params_dtype
,
quant_config
,
>>>>>>>
v0
.
4.2
self
.
output_sizes
)
self
.
output_sizes
)
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
...
@@ -514,12 +509,8 @@ class QKVParallelLinear(ColumnParallelLinear):
...
@@ -514,12 +509,8 @@ class QKVParallelLinear(ColumnParallelLinear):
]
]
super
().
__init__
(
input_size
,
output_size
,
bias
,
False
,
skip_bias_add
,
super
().
__init__
(
input_size
,
output_size
,
bias
,
False
,
skip_bias_add
,
<<<<<<<
HEAD
params_dtype
,
linear_method
,
output_sizes
)
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
=======
params_dtype
,
quant_config
,
output_sizes
)
params_dtype
,
quant_config
,
output_sizes
)
>>>>>>>
v0
.
4.2
self
.
use_llama_nn
=
os
.
environ
.
get
(
'LLAMA_NN'
)
==
'1'
def
weight_loader
(
self
,
def
weight_loader
(
self
,
param
:
Parameter
,
param
:
Parameter
,
...
...
vllm/model_executor/model_loader/loader.py
View file @
145787ae
...
@@ -54,18 +54,12 @@ def _get_quantization_config(
...
@@ -54,18 +54,12 @@ def _get_quantization_config(
f
"
{
model_config
.
dtype
}
is not supported for quantization "
f
"
{
model_config
.
dtype
}
is not supported for quantization "
f
"method
{
model_config
.
quantization
}
. Supported dtypes: "
f
"method
{
model_config
.
quantization
}
. Supported dtypes: "
f
"
{
supported_dtypes
}
"
)
f
"
{
supported_dtypes
}
"
)
<<<<<<<
HEAD
return
quant_config
linear_method
=
quant_config
.
get_linear_method
()
if
quant_config
!=
None
:
if
linear_method
!=
None
:
os
.
environ
[
'LLAMA_NN'
]
=
'0'
os
.
environ
[
'LLAMA_NN'
]
=
'0'
return
linear_method
=======
return
quant_config
return
None
return
None
>>>>>>>
v0
.
4.2
def
_get_model_initialization_kwargs
(
def
_get_model_initialization_kwargs
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment