Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8b6c6b95
Unverified
Commit
8b6c6b95
authored
Mar 20, 2026
by
Ilya Boytsov
Committed by
GitHub
Mar 20, 2026
Browse files
[Model] Add LFM2-ColBERT-350M support (#37528)
Signed-off-by:
Ilya Boytsov
<
ilyaboytsov1805@gmail.com
>
parent
9f6d9dd3
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
125 additions
and
1 deletion
+125
-1
docs/models/pooling_models/specific_models.md
docs/models/pooling_models/specific_models.md
+5
-0
docs/models/pooling_models/token_embed.md
docs/models/pooling_models/token_embed.md
+1
-0
tests/models/language/pooling/test_colbert.py
tests/models/language/pooling/test_colbert.py
+16
-0
tests/models/registry.py
tests/models/registry.py
+5
-0
vllm/model_executor/models/colbert.py
vllm/model_executor/models/colbert.py
+97
-1
vllm/model_executor/models/registry.py
vllm/model_executor/models/registry.py
+1
-0
No files found.
docs/models/pooling_models/specific_models.md
View file @
8b6c6b95
...
@@ -11,6 +11,7 @@ vLLM supports ColBERT models with multiple encoder backbones:
...
@@ -11,6 +11,7 @@ vLLM supports ColBERT models with multiple encoder backbones:
|
`HF_ColBERT`
| BERT |
`answerdotai/answerai-colbert-small-v1`
,
`colbert-ir/colbertv2.0`
|
|
`HF_ColBERT`
| BERT |
`answerdotai/answerai-colbert-small-v1`
,
`colbert-ir/colbertv2.0`
|
|
`ColBERTModernBertModel`
| ModernBERT |
`lightonai/GTE-ModernColBERT-v1`
|
|
`ColBERTModernBertModel`
| ModernBERT |
`lightonai/GTE-ModernColBERT-v1`
|
|
`ColBERTJinaRobertaModel`
| Jina XLM-RoBERTa |
`jinaai/jina-colbert-v2`
|
|
`ColBERTJinaRobertaModel`
| Jina XLM-RoBERTa |
`jinaai/jina-colbert-v2`
|
|
`ColBERTLfm2Model`
| LFM2 |
`LiquidAI/LFM2-ColBERT-350M`
|
**BERT-based ColBERT**
models work out of the box:
**BERT-based ColBERT**
models work out of the box:
...
@@ -29,6 +30,10 @@ vllm serve lightonai/GTE-ModernColBERT-v1 \
...
@@ -29,6 +30,10 @@ vllm serve lightonai/GTE-ModernColBERT-v1 \
vllm serve jinaai/jina-colbert-v2
\
vllm serve jinaai/jina-colbert-v2
\
--hf-overrides
'{"architectures": ["ColBERTJinaRobertaModel"]}'
\
--hf-overrides
'{"architectures": ["ColBERTJinaRobertaModel"]}'
\
--trust-remote-code
--trust-remote-code
# LFM2 backbone
vllm serve LiquidAI/LFM2-ColBERT-350M
\
--hf-overrides
'{"architectures": ["ColBERTLfm2Model"]}'
```
```
Then you can use the rerank API:
Then you can use the rerank API:
...
...
docs/models/pooling_models/token_embed.md
View file @
8b6c6b95
...
@@ -39,6 +39,7 @@ Models of any architecture can be converted into embedding models using `--conve
...
@@ -39,6 +39,7 @@ Models of any architecture can be converted into embedding models using `--conve
| Architecture | Models | Example HF Models |
[
LoRA
](
../../features/lora.md
)
|
[
PP
](
../../serving/parallelism_scaling.md
)
|
| Architecture | Models | Example HF Models |
[
LoRA
](
../../features/lora.md
)
|
[
PP
](
../../serving/parallelism_scaling.md
)
|
| ------------ | ------ | ----------------- | -------------------- | ------------------------- |
| ------------ | ------ | ----------------- | -------------------- | ------------------------- |
|
`ColBERTLfm2Model`
| LFM2 |
`LiquidAI/LFM2-ColBERT-350M`
| | |
|
`ColBERTModernBertModel`
| ModernBERT |
`lightonai/GTE-ModernColBERT-v1`
| | |
|
`ColBERTModernBertModel`
| ModernBERT |
`lightonai/GTE-ModernColBERT-v1`
| | |
|
`ColBERTJinaRobertaModel`
| Jina XLM-RoBERTa |
`jinaai/jina-colbert-v2`
| | |
|
`ColBERTJinaRobertaModel`
| Jina XLM-RoBERTa |
`jinaai/jina-colbert-v2`
| | |
|
`HF_ColBERT`
| BERT |
`answerdotai/answerai-colbert-small-v1`
,
`colbert-ir/colbertv2.0`
| | |
|
`HF_ColBERT`
| BERT |
`answerdotai/answerai-colbert-small-v1`
,
`colbert-ir/colbertv2.0`
| | |
...
...
tests/models/language/pooling/test_colbert.py
View file @
8b6c6b95
...
@@ -59,6 +59,22 @@ COLBERT_MODELS = {
...
@@ -59,6 +59,22 @@ COLBERT_MODELS = {
"model_cls"
:
"AutoModel"
,
"model_cls"
:
"AutoModel"
,
},
},
},
},
"lfm2"
:
{
"model"
:
"LiquidAI/LFM2-ColBERT-350M"
,
"colbert_dim"
:
128
,
"max_model_len"
:
511
,
"extra_kwargs"
:
{
"hf_overrides"
:
{
"architectures"
:
[
"ColBERTLfm2Model"
],
},
},
"hf_comparison"
:
{
"weights_file"
:
"1_Dense/model.safetensors"
,
"weights_key"
:
"linear.weight"
,
"trust_remote_code"
:
False
,
"model_cls"
:
"AutoModel"
,
},
},
}
}
...
...
tests/models/registry.py
View file @
8b6c6b95
...
@@ -628,6 +628,11 @@ _LATE_INTERACTION_EXAMPLE_MODELS = {
...
@@ -628,6 +628,11 @@ _LATE_INTERACTION_EXAMPLE_MODELS = {
trust_remote_code
=
True
,
trust_remote_code
=
True
,
hf_overrides
=
{
"architectures"
:
[
"ColBERTJinaRobertaModel"
]},
hf_overrides
=
{
"architectures"
:
[
"ColBERTJinaRobertaModel"
]},
),
),
"ColBERTLfm2Model"
:
_HfExamplesInfo
(
"LiquidAI/LFM2-ColBERT-350M"
,
trust_remote_code
=
True
,
hf_overrides
=
{
"architectures"
:
[
"ColBERTLfm2Model"
]},
),
# [Multimodal]
# [Multimodal]
"ColModernVBertForRetrieval"
:
_HfExamplesInfo
(
"ColModernVBertForRetrieval"
:
_HfExamplesInfo
(
"ModernVBERT/colmodernvbert-merged"
,
"ModernVBERT/colmodernvbert-merged"
,
...
...
vllm/model_executor/models/colbert.py
View file @
8b6c6b95
...
@@ -27,8 +27,9 @@ from vllm.model_executor.layers.pooler import Pooler
...
@@ -27,8 +27,9 @@ from vllm.model_executor.layers.pooler import Pooler
from
vllm.model_executor.layers.pooler.tokwise
import
pooler_for_token_embed
from
vllm.model_executor.layers.pooler.tokwise
import
pooler_for_token_embed
from
.bert
import
BertEmbeddingModel
,
BertModel
from
.bert
import
BertEmbeddingModel
,
BertModel
from
.interfaces
import
SupportsLateInteraction
from
.interfaces
import
HasInnerState
,
IsHybrid
,
SupportsLateInteraction
from
.interfaces_base
import
default_pooling_type
from
.interfaces_base
import
default_pooling_type
from
.lfm2
import
Lfm2ForCausalLM
,
Lfm2Model
class
ColBERTMixin
(
nn
.
Module
,
SupportsLateInteraction
):
class
ColBERTMixin
(
nn
.
Module
,
SupportsLateInteraction
):
...
@@ -414,3 +415,98 @@ class ColBERTJinaRobertaModel(ColBERTMixin, nn.Module):
...
@@ -414,3 +415,98 @@ class ColBERTJinaRobertaModel(ColBERTMixin, nn.Module):
loaded
.
update
(
colbert_loaded
)
loaded
.
update
(
colbert_loaded
)
return
loaded
return
loaded
# -----------------------------------------------------------------------
# Concrete model: ColBERT + LFM2 backbone
# -----------------------------------------------------------------------
@
default_pooling_type
(
seq_pooling_type
=
"CLS"
,
tok_pooling_type
=
"ALL"
)
class
ColBERTLfm2Model
(
ColBERTMixin
,
nn
.
Module
,
HasInnerState
,
IsHybrid
):
"""ColBERT late interaction model with LFM2 backbone.
For ``LiquidAI/LFM2-ColBERT-350M`` and similar models.
The projection is auto-loaded from sentence-transformers ``1_Dense/``
when not present in the main checkpoint.
"""
is_pooling_model
=
True
# LFM2 is a hybrid model (attention + SSM layers); these flags ensure
# HybridAttentionMambaModelConfig.verify_and_update_config runs so that
# mamba_block_size and related cache settings are correctly initialised.
is_hybrid
=
True
has_inner_state
=
True
@
classmethod
def
get_mamba_state_shape_from_config
(
cls
,
vllm_config
:
VllmConfig
):
return
Lfm2ForCausalLM
.
get_mamba_state_shape_from_config
(
vllm_config
)
@
classmethod
def
get_mamba_state_dtype_from_config
(
cls
,
vllm_config
:
VllmConfig
):
return
Lfm2ForCausalLM
.
get_mamba_state_dtype_from_config
(
vllm_config
)
@
classmethod
def
get_mamba_state_copy_func
(
cls
):
return
Lfm2ForCausalLM
.
get_mamba_state_copy_func
()
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
):
super
().
__init__
()
config
=
vllm_config
.
model_config
.
hf_config
colbert_dim
=
self
.
get_colbert_dim_from_config
(
config
)
self
.
_init_colbert_components
(
hidden_size
=
config
.
hidden_size
,
colbert_dim
=
colbert_dim
,
head_dtype
=
vllm_config
.
model_config
.
head_dtype
,
)
self
.
model
=
Lfm2Model
(
vllm_config
=
vllm_config
,
prefix
=
prefix
,
)
pooler_config
=
vllm_config
.
model_config
.
pooler_config
assert
pooler_config
is
not
None
self
.
pooler
=
self
.
_build_colbert_pooler
(
pooler_config
)
def
embed_input_ids
(
self
,
input_ids
:
torch
.
Tensor
)
->
torch
.
Tensor
:
return
self
.
model
.
embed_input_ids
(
input_ids
)
def
forward
(
self
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
intermediate_tensors
=
None
,
inputs_embeds
:
torch
.
Tensor
|
None
=
None
,
)
->
torch
.
Tensor
:
return
self
.
model
(
input_ids
=
input_ids
,
positions
=
positions
,
inputs_embeds
=
inputs_embeds
,
intermediate_tensors
=
intermediate_tensors
,
)
def
load_weights
(
self
,
weights
:
Iterable
[
tuple
[
str
,
torch
.
Tensor
]]):
other_weights
,
colbert_loaded
=
self
.
_load_colbert_weights
(
weights
)
# Strip "model." prefix added by the embedding adapter
model_weights
=
[
(
n
[
len
(
"model."
)
:]
if
n
.
startswith
(
"model."
)
else
n
,
w
)
for
n
,
w
in
other_weights
]
loaded_model
=
self
.
model
.
load_weights
(
model_weights
)
loaded
=
{
f
"model.
{
name
}
"
for
name
in
loaded_model
}
|
colbert_loaded
# When the ST projector was auto-loaded during init
# (not from the main checkpoint), mark its params as loaded
# so the weight validator doesn't complain.
if
hasattr
(
self
.
pooler
,
"head"
):
head
=
self
.
pooler
.
head
projector
=
getattr
(
head
,
"projector"
,
None
)
if
projector
is
not
None
and
isinstance
(
projector
,
nn
.
Module
):
for
name
,
_
in
projector
.
named_parameters
():
loaded
.
add
(
f
"pooler.head.projector.
{
name
}
"
)
return
loaded
vllm/model_executor/models/registry.py
View file @
8b6c6b95
...
@@ -269,6 +269,7 @@ _LATE_INTERACTION_MODELS = {
...
@@ -269,6 +269,7 @@ _LATE_INTERACTION_MODELS = {
"HF_ColBERT"
:
(
"colbert"
,
"ColBERTModel"
),
"HF_ColBERT"
:
(
"colbert"
,
"ColBERTModel"
),
"ColBERTModernBertModel"
:
(
"colbert"
,
"ColBERTModernBertModel"
),
"ColBERTModernBertModel"
:
(
"colbert"
,
"ColBERTModernBertModel"
),
"ColBERTJinaRobertaModel"
:
(
"colbert"
,
"ColBERTJinaRobertaModel"
),
"ColBERTJinaRobertaModel"
:
(
"colbert"
,
"ColBERTJinaRobertaModel"
),
"ColBERTLfm2Model"
:
(
"colbert"
,
"ColBERTLfm2Model"
),
# [Multimodal]
# [Multimodal]
"ColModernVBertForRetrieval"
:
(
"colmodernvbert"
,
"ColModernVBertForRetrieval"
),
"ColModernVBertForRetrieval"
:
(
"colmodernvbert"
,
"ColModernVBertForRetrieval"
),
"ColPaliForRetrieval"
:
(
"colpali"
,
"ColPaliModel"
),
"ColPaliForRetrieval"
:
(
"colpali"
,
"ColPaliModel"
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment