Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
82e40fb7
Commit
82e40fb7
authored
Jan 27, 2026
by
zhuwenwen
Browse files
Merge tag 'v0.15.0rc1' into v0.15.0rc1-ori
parents
30a1922e
58996f35
Changes
314
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
95 additions
and
28 deletions
+95
-28
vllm/model_executor/layers/quantization/utils/quant_utils.py
vllm/model_executor/layers/quantization/utils/quant_utils.py
+67
-0
vllm/model_executor/models/afmoe.py
vllm/model_executor/models/afmoe.py
+2
-2
vllm/model_executor/models/apertus.py
vllm/model_executor/models/apertus.py
+1
-1
vllm/model_executor/models/arcee.py
vllm/model_executor/models/arcee.py
+1
-1
vllm/model_executor/models/arctic.py
vllm/model_executor/models/arctic.py
+2
-2
vllm/model_executor/models/aria.py
vllm/model_executor/models/aria.py
+1
-1
vllm/model_executor/models/audioflamingo3.py
vllm/model_executor/models/audioflamingo3.py
+1
-1
vllm/model_executor/models/aya_vision.py
vllm/model_executor/models/aya_vision.py
+1
-1
vllm/model_executor/models/bagel.py
vllm/model_executor/models/bagel.py
+1
-1
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/baichuan.py
+2
-2
vllm/model_executor/models/bailing_moe.py
vllm/model_executor/models/bailing_moe.py
+2
-2
vllm/model_executor/models/bamba.py
vllm/model_executor/models/bamba.py
+2
-2
vllm/model_executor/models/bert_with_rope.py
vllm/model_executor/models/bert_with_rope.py
+1
-1
vllm/model_executor/models/blip2.py
vllm/model_executor/models/blip2.py
+1
-1
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+2
-2
vllm/model_executor/models/chameleon.py
vllm/model_executor/models/chameleon.py
+1
-1
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/chatglm.py
+2
-2
vllm/model_executor/models/cohere2_vision.py
vllm/model_executor/models/cohere2_vision.py
+1
-1
vllm/model_executor/models/commandr.py
vllm/model_executor/models/commandr.py
+2
-2
vllm/model_executor/models/dbrx.py
vllm/model_executor/models/dbrx.py
+2
-2
No files found.
vllm/model_executor/layers/quantization/utils/quant_utils.py
View file @
82e40fb7
...
@@ -868,3 +868,70 @@ def convert_packed_uint4b8_to_signed_int4_inplace(t: torch.Tensor) -> torch.Tens
...
@@ -868,3 +868,70 @@ def convert_packed_uint4b8_to_signed_int4_inplace(t: torch.Tensor) -> torch.Tens
t
|=
((
nib
-
8
)
&
0xF
)
<<
shift
t
|=
((
nib
-
8
)
&
0xF
)
<<
shift
return
t
return
t
def
round_up
(
x
:
int
,
m
:
int
)
->
int
:
"""Round up x to the nearest multiple of m."""
return
(
x
+
m
-
1
)
//
m
*
m
def
pad_nvfp4_weight_for_cutlass
(
weight
:
torch
.
Tensor
,
alignment
:
int
=
32
,
)
->
tuple
[
torch
.
Tensor
,
int
]:
"""
Pad packed NVFP4 weights so that both N (rows) and K (columns) satisfy
the alignment constraints required by CUTLASS / FlashInfer FP4 kernels.
CUTLASS FP4 kernel requires both K and N matrix dimensions to be divisible
by 32 for aligned memory access and efficient tensor core operations.
"""
weight_current_rows
=
weight
.
shape
[
0
]
# Pad N dimension (rows) if not aligned
if
weight_current_rows
%
alignment
!=
0
:
total_rows
=
round_up
(
weight_current_rows
,
alignment
)
pad_rows
=
total_rows
-
weight_current_rows
weight
=
torch
.
nn
.
functional
.
pad
(
weight
,
(
0
,
0
,
0
,
pad_rows
)).
contiguous
()
# Check K dimension alignment
# 2 FP4 items are packed per byte in the input dimension
weight_current_col_bytes
=
weight
.
shape
[
1
]
weight_current_col_elements
=
weight_current_col_bytes
*
2
weights_padding_bytes
=
0
if
weight_current_col_elements
%
alignment
!=
0
:
total_cols
=
round_up
(
weight_current_col_elements
,
alignment
)
pad_cols
=
total_cols
-
weight_current_col_elements
# Convert from FP4 element count to bytes (2 FP4 values per byte)
# pad_cols is always even since alignment=32 and current elements are even
pad_bytes
=
pad_cols
//
2
weight
=
torch
.
nn
.
functional
.
pad
(
weight
,
(
0
,
pad_bytes
,
0
,
0
)).
contiguous
()
weights_padding_bytes
=
pad_bytes
return
weight
,
weights_padding_bytes
def
pad_nvfp4_activation_for_cutlass
(
x_fp4
:
torch
.
Tensor
,
weights_padding_bytes
:
int
,
)
->
torch
.
Tensor
:
"""
Pad packed FP4 activations to match the K-dimension padding applied to weights.
The padding is in bytes (tensor dimension), not FP4 elements.
"""
if
weights_padding_bytes
>
0
:
return
torch
.
nn
.
functional
.
pad
(
x_fp4
,
(
0
,
weights_padding_bytes
)).
contiguous
()
return
x_fp4
def
slice_nvfp4_output
(
out
:
torch
.
Tensor
,
output_size
:
int
,
)
->
torch
.
Tensor
:
"""
Slice the output tensor to remove padding in N dimension if weight was padded.
"""
if
out
.
shape
[
-
1
]
!=
output_size
:
return
out
[...,
:
output_size
].
contiguous
()
return
out
vllm/model_executor/models/afmoe.py
View file @
82e40fb7
...
@@ -425,7 +425,7 @@ class AfmoeModel(nn.Module):
...
@@ -425,7 +425,7 @@ class AfmoeModel(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -675,7 +675,7 @@ class AfmoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
...
@@ -675,7 +675,7 @@ class AfmoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/apertus.py
View file @
82e40fb7
...
@@ -542,7 +542,7 @@ class ApertusForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -542,7 +542,7 @@ class ApertusForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/arcee.py
View file @
82e40fb7
...
@@ -394,7 +394,7 @@ class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -394,7 +394,7 @@ class ArceeForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/arctic.py
View file @
82e40fb7
...
@@ -406,7 +406,7 @@ class ArcticModel(nn.Module):
...
@@ -406,7 +406,7 @@ class ArcticModel(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -460,7 +460,7 @@ class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant):
...
@@ -460,7 +460,7 @@ class ArcticForCausalLM(nn.Module, SupportsPP, SupportsQuant):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/aria.py
View file @
82e40fb7
...
@@ -629,7 +629,7 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal):
...
@@ -629,7 +629,7 @@ class AriaForConditionalGeneration(nn.Module, SupportsMultiModal):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/audioflamingo3.py
View file @
82e40fb7
...
@@ -609,7 +609,7 @@ class AudioFlamingo3ForConditionalGeneration(
...
@@ -609,7 +609,7 @@ class AudioFlamingo3ForConditionalGeneration(
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/aya_vision.py
View file @
82e40fb7
...
@@ -420,7 +420,7 @@ class AyaVisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP
...
@@ -420,7 +420,7 @@ class AyaVisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsP
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/bagel.py
View file @
82e40fb7
...
@@ -507,7 +507,7 @@ class BagelForConditionalGeneration(
...
@@ -507,7 +507,7 @@ class BagelForConditionalGeneration(
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/baichuan.py
View file @
82e40fb7
...
@@ -311,7 +311,7 @@ class BaiChuanModel(nn.Module):
...
@@ -311,7 +311,7 @@ class BaiChuanModel(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -428,7 +428,7 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant
...
@@ -428,7 +428,7 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/bailing_moe.py
View file @
82e40fb7
...
@@ -440,7 +440,7 @@ class BailingMoeModel(nn.Module):
...
@@ -440,7 +440,7 @@ class BailingMoeModel(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
position_ids
:
torch
.
Tensor
,
position_ids
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -611,7 +611,7 @@ class BailingMoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
...
@@ -611,7 +611,7 @@ class BailingMoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/bamba.py
View file @
82e40fb7
...
@@ -311,7 +311,7 @@ class BambaModel(nn.Module):
...
@@ -311,7 +311,7 @@ class BambaModel(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -493,7 +493,7 @@ class BambaForCausalLM(
...
@@ -493,7 +493,7 @@ class BambaForCausalLM(
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/bert_with_rope.py
View file @
82e40fb7
...
@@ -475,7 +475,7 @@ class BertWithRope(nn.Module, SupportsQuant):
...
@@ -475,7 +475,7 @@ class BertWithRope(nn.Module, SupportsQuant):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/blip2.py
View file @
82e40fb7
...
@@ -641,7 +641,7 @@ class Blip2ForConditionalGeneration(
...
@@ -641,7 +641,7 @@ class Blip2ForConditionalGeneration(
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/bloom.py
View file @
82e40fb7
...
@@ -276,7 +276,7 @@ class BloomModel(nn.Module):
...
@@ -276,7 +276,7 @@ class BloomModel(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
position_ids
:
torch
.
Tensor
,
position_ids
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -358,7 +358,7 @@ class BloomForCausalLM(nn.Module, SupportsPP, SupportsQuant):
...
@@ -358,7 +358,7 @@ class BloomForCausalLM(nn.Module, SupportsPP, SupportsQuant):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/chameleon.py
View file @
82e40fb7
...
@@ -994,7 +994,7 @@ class ChameleonForConditionalGeneration(
...
@@ -994,7 +994,7 @@ class ChameleonForConditionalGeneration(
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/chatglm.py
View file @
82e40fb7
...
@@ -362,7 +362,7 @@ class ChatGLMModel(nn.Module, SupportsQuant):
...
@@ -362,7 +362,7 @@ class ChatGLMModel(nn.Module, SupportsQuant):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -491,7 +491,7 @@ class ChatGLMForCausalLM(ChatGLMBaseModel, SupportsLoRA, SupportsPP, SupportsQua
...
@@ -491,7 +491,7 @@ class ChatGLMForCausalLM(ChatGLMBaseModel, SupportsLoRA, SupportsPP, SupportsQua
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/cohere2_vision.py
View file @
82e40fb7
...
@@ -446,7 +446,7 @@ class Cohere2VisionForConditionalGeneration(nn.Module, SupportsMultiModal, Suppo
...
@@ -446,7 +446,7 @@ class Cohere2VisionForConditionalGeneration(nn.Module, SupportsMultiModal, Suppo
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/commandr.py
View file @
82e40fb7
...
@@ -312,7 +312,7 @@ class CohereModel(nn.Module):
...
@@ -312,7 +312,7 @@ class CohereModel(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -438,7 +438,7 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
...
@@ -438,7 +438,7 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsQuant):
@
torch
.
no_grad
()
@
torch
.
no_grad
()
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
vllm/model_executor/models/dbrx.py
View file @
82e40fb7
...
@@ -361,7 +361,7 @@ class DbrxModel(nn.Module):
...
@@ -361,7 +361,7 @@ class DbrxModel(nn.Module):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
position_ids
:
torch
.
Tensor
,
position_ids
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
,
intermediate_tensors
:
IntermediateTensors
|
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
@@ -462,7 +462,7 @@ class DbrxForCausalLM(nn.Module, SupportsPP):
...
@@ -462,7 +462,7 @@ class DbrxForCausalLM(nn.Module, SupportsPP):
def
forward
(
def
forward
(
self
,
self
,
input_ids
:
torch
.
Tensor
,
input_ids
:
torch
.
Tensor
|
None
,
positions
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
intermediate_tensors
:
IntermediateTensors
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
...
...
Prev
1
…
3
4
5
6
7
8
9
10
11
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment