Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ec8f943d
Unverified
Commit
ec8f943d
authored
Feb 26, 2026
by
hujia177
Committed by
GitHub
Feb 26, 2026
Browse files
Add GlmOcrConfig for GLM-OCR model type recognition (#34982)
parent
f2ad952f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
96 additions
and
0 deletions
+96
-0
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+1
-0
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+4
-0
vllm/transformers_utils/configs/glm_ocr.py
vllm/transformers_utils/configs/glm_ocr.py
+91
-0
No files found.
vllm/transformers_utils/config.py
View file @
ec8f943d
...
...
@@ -82,6 +82,7 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = LazyConfigDict(
deepseek_v32
=
"DeepseekV3Config"
,
flex_olmo
=
"FlexOlmoConfig"
,
funaudiochat
=
"FunAudioChatConfig"
,
glm_ocr
=
"GlmOcrConfig"
,
hunyuan_vl
=
"HunYuanVLConfig"
,
isaac
=
"IsaacConfig"
,
kimi_linear
=
"KimiLinearConfig"
,
...
...
vllm/transformers_utils/configs/__init__.py
View file @
ec8f943d
...
...
@@ -28,6 +28,8 @@ _CLASS_TO_MODULE: dict[str, str] = {
"FlexOlmoConfig"
:
"vllm.transformers_utils.configs.flex_olmo"
,
"FunAudioChatConfig"
:
"vllm.transformers_utils.configs.funaudiochat"
,
"FunAudioChatAudioEncoderConfig"
:
"vllm.transformers_utils.configs.funaudiochat"
,
"GlmOcrConfig"
:
"vllm.transformers_utils.configs.glm_ocr"
,
"GlmOcrVisionConfig"
:
"vllm.transformers_utils.configs.glm_ocr"
,
"HunYuanVLConfig"
:
"vllm.transformers_utils.configs.hunyuan_vl"
,
"HunYuanVLTextConfig"
:
"vllm.transformers_utils.configs.hunyuan_vl"
,
"HunYuanVLVisionConfig"
:
"vllm.transformers_utils.configs.hunyuan_vl"
,
...
...
@@ -83,6 +85,8 @@ __all__ = [
"FlexOlmoConfig"
,
"FunAudioChatConfig"
,
"FunAudioChatAudioEncoderConfig"
,
"GlmOcrConfig"
,
"GlmOcrVisionConfig"
,
"HunYuanVLConfig"
,
"HunYuanVLTextConfig"
,
"HunYuanVLVisionConfig"
,
...
...
vllm/transformers_utils/configs/glm_ocr.py
0 → 100644
View file @
ec8f943d
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
__future__
import
annotations
from
typing
import
Any
from
transformers.configuration_utils
import
PretrainedConfig
class
GlmOcrVisionConfig
(
PretrainedConfig
):
model_type
=
"glm_ocr_vision"
def
__init__
(
self
,
hidden_size
:
int
=
1024
,
depth
:
int
=
24
,
num_heads
:
int
=
16
,
attention_bias
:
bool
=
True
,
intermediate_size
:
int
=
4096
,
hidden_act
:
str
=
"silu"
,
hidden_dropout_prob
:
float
=
0.0
,
initializer_range
:
float
=
0.02
,
image_size
:
int
=
336
,
in_channels
:
int
=
3
,
patch_size
:
int
=
14
,
out_hidden_size
:
int
=
1536
,
rms_norm_eps
:
float
=
1e-5
,
spatial_merge_size
:
int
=
2
,
temporal_patch_size
:
int
=
2
,
**
kwargs
:
Any
,
):
super
().
__init__
(
**
kwargs
)
self
.
hidden_size
=
hidden_size
self
.
depth
=
depth
self
.
num_heads
=
num_heads
self
.
attention_bias
=
attention_bias
self
.
intermediate_size
=
intermediate_size
self
.
hidden_act
=
hidden_act
self
.
hidden_dropout_prob
=
hidden_dropout_prob
self
.
initializer_range
=
initializer_range
self
.
image_size
=
image_size
self
.
in_channels
=
in_channels
self
.
patch_size
=
patch_size
self
.
out_hidden_size
=
out_hidden_size
self
.
rms_norm_eps
=
rms_norm_eps
self
.
spatial_merge_size
=
spatial_merge_size
self
.
temporal_patch_size
=
temporal_patch_size
class
GlmOcrConfig
(
PretrainedConfig
):
model_type
=
"glm_ocr"
def
__init__
(
self
,
text_config
:
dict
|
None
=
None
,
vision_config
:
dict
|
None
=
None
,
image_start_token_id
:
int
=
59256
,
image_end_token_id
:
int
=
59257
,
video_start_token_id
:
int
=
59258
,
video_end_token_id
:
int
=
59259
,
image_token_id
:
int
=
59280
,
video_token_id
:
int
=
59281
,
**
kwargs
:
Any
,
):
super
().
__init__
(
**
kwargs
)
self
.
image_start_token_id
=
image_start_token_id
self
.
image_end_token_id
=
image_end_token_id
self
.
video_start_token_id
=
video_start_token_id
self
.
video_end_token_id
=
video_end_token_id
self
.
image_token_id
=
image_token_id
self
.
video_token_id
=
video_token_id
self
.
vision_config
=
GlmOcrVisionConfig
(
**
(
vision_config
or
{}))
if
isinstance
(
text_config
,
dict
):
from
transformers
import
AutoConfig
model_type
=
text_config
.
get
(
"model_type"
,
"chatglm"
)
self
.
text_config
=
AutoConfig
.
for_model
(
model_type
,
**
text_config
)
elif
text_config
is
None
:
from
transformers
import
AutoConfig
self
.
text_config
=
AutoConfig
.
for_model
(
"chatglm"
)
else
:
self
.
text_config
=
text_config
def
get_text_config
(
self
)
->
PretrainedConfig
:
return
self
.
text_config
def
save_pretrained
(
self
,
save_directory
,
**
kwargs
):
self
.
_auto_class
=
None
super
().
save_pretrained
(
save_directory
,
**
kwargs
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment