Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f80ab352
Unverified
Commit
f80ab352
authored
Aug 05, 2024
by
Jee Jee Li
Committed by
GitHub
Aug 04, 2024
Browse files
Clean up remaining Punica C information (#7027)
parent
16a1cc9b
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
3 additions
and
15 deletions
+3
-15
.github/workflows/clang-format.yml
.github/workflows/clang-format.yml
+0
-6
cmake/utils.cmake
cmake/utils.cmake
+1
-1
format.sh
format.sh
+0
-6
vllm/config.py
vllm/config.py
+1
-1
vllm/lora/layers.py
vllm/lora/layers.py
+1
-1
No files found.
.github/workflows/clang-format.yml
View file @
f80ab352
...
@@ -30,12 +30,6 @@ jobs:
...
@@ -30,12 +30,6 @@ jobs:
run
:
|
run
:
|
EXCLUDES=(
EXCLUDES=(
'csrc/moe/topk_softmax_kernels.cu'
'csrc/moe/topk_softmax_kernels.cu'
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
'csrc/punica/bgmv/bgmv_config.h'
'csrc/punica/bgmv/bgmv_impl.cuh'
'csrc/punica/bgmv/vec_dtypes.cuh'
'csrc/punica/punica_ops.cu'
'csrc/punica/type_convert.h'
)
)
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
find csrc/ \( -name '*.h' -o -name '*.cpp' -o -name '*.cu' -o -name '*.cuh' \) -print \
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
| grep -vFf <(printf "%s\n" "${EXCLUDES[@]}") \
...
...
cmake/utils.cmake
View file @
f80ab352
...
@@ -181,7 +181,7 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
...
@@ -181,7 +181,7 @@ macro(override_gpu_arches GPU_ARCHES GPU_LANG GPU_SUPPORTED_ARCHES)
#
#
# The torch cmake setup hardcodes the detected architecture flags in
# The torch cmake setup hardcodes the detected architecture flags in
# `CMAKE_CUDA_FLAGS`. Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
# `CMAKE_CUDA_FLAGS`. Since `CMAKE_CUDA_FLAGS` is a "global" variable, it
# can't modified on a per-target basis
, e.g. for the `punica` extension
.
# can't modified on a per-target basis.
# So, all the `-gencode` flags need to be extracted and removed from
# So, all the `-gencode` flags need to be extracted and removed from
# `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
# `CMAKE_CUDA_FLAGS` for processing so they can be passed by another method.
# Since it's not possible to use `target_compiler_options` for adding target
# Since it's not possible to use `target_compiler_options` for adding target
...
...
format.sh
View file @
f80ab352
...
@@ -242,12 +242,6 @@ echo 'vLLM isort: Done'
...
@@ -242,12 +242,6 @@ echo 'vLLM isort: Done'
# NOTE: Keep up to date with .github/workflows/clang-format.yml
# NOTE: Keep up to date with .github/workflows/clang-format.yml
CLANG_FORMAT_EXCLUDES
=(
CLANG_FORMAT_EXCLUDES
=(
'csrc/moe/topk_softmax_kernels.cu'
'csrc/moe/topk_softmax_kernels.cu'
'csrc/punica/bgmv/bgmv_bf16_bf16_bf16.cu'
'csrc/punica/bgmv/bgmv_config.h'
'csrc/punica/bgmv/bgmv_impl.cuh'
'csrc/punica/bgmv/vec_dtypes.cuh'
'csrc/punica/punica_ops.cu'
'csrc/punica/type_convert.h'
)
)
# Format specified files with clang-format
# Format specified files with clang-format
...
...
vllm/config.py
View file @
f80ab352
...
@@ -1304,7 +1304,7 @@ class LoRAConfig:
...
@@ -1304,7 +1304,7 @@ class LoRAConfig:
long_lora_scaling_factors
:
Optional
[
Tuple
[
float
]]
=
None
long_lora_scaling_factors
:
Optional
[
Tuple
[
float
]]
=
None
def
__post_init__
(
self
):
def
__post_init__
(
self
):
#
Keep this in sync with csrc/punica/bgmv/bgmv_config.h
#
TODO: Increase the range of rank
possible_max_ranks
=
(
8
,
16
,
32
,
64
)
possible_max_ranks
=
(
8
,
16
,
32
,
64
)
possible_lora_extra_vocab_size
=
(
0
,
256
,
512
)
possible_lora_extra_vocab_size
=
(
0
,
256
,
512
)
if
self
.
max_lora_rank
not
in
possible_max_ranks
:
if
self
.
max_lora_rank
not
in
possible_max_ranks
:
...
...
vllm/lora/layers.py
View file @
f80ab352
...
@@ -1073,7 +1073,7 @@ class LogitsProcessorWithLoRA(BaseLayerWithLoRA):
...
@@ -1073,7 +1073,7 @@ class LogitsProcessorWithLoRA(BaseLayerWithLoRA):
lora_config
:
LoRAConfig
,
lora_config
:
LoRAConfig
,
model_config
:
Optional
[
PretrainedConfig
]
=
None
,
model_config
:
Optional
[
PretrainedConfig
]
=
None
,
)
->
None
:
)
->
None
:
#
Keep this in sync with csrc/punica/bgmv/bgmv_config.h
#
TODO: Verify if this condition can be relaxed
if
32000
<
self
.
base_layer
.
vocab_size
>
128512
:
if
32000
<
self
.
base_layer
.
vocab_size
>
128512
:
raise
ValueError
(
"When using LoRA, vocab size must be "
raise
ValueError
(
"When using LoRA, vocab size must be "
"32000 >= vocab_size <= 128512"
)
"32000 >= vocab_size <= 128512"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment