Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ac4f685b
Commit
ac4f685b
authored
Jan 06, 2026
by
zhuwenwen
Browse files
remove qiyuan-8b-v2 and FM9GForCausalLM
parent
05e8b083
Changes
11
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
10 additions
and
1309 deletions
+10
-1309
vllm/config/model.py
vllm/config/model.py
+1
-1
vllm/entrypoints/openai/serving_engine.py
vllm/entrypoints/openai/serving_engine.py
+3
-10
vllm/inputs/preprocess.py
vllm/inputs/preprocess.py
+1
-4
vllm/model_executor/models/fm9g.py
vllm/model_executor/models/fm9g.py
+0
-592
vllm/model_executor/models/registry.py
vllm/model_executor/models/registry.py
+0
-1
vllm/tokenizers/detokenizer_utils.py
vllm/tokenizers/detokenizer_utils.py
+5
-19
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/__init__.py
+0
-2
vllm/transformers_utils/configs/fm9g.py
vllm/transformers_utils/configs/fm9g.py
+0
-187
vllm/transformers_utils/tokenizers/__init__.py
vllm/transformers_utils/tokenizers/__init__.py
+0
-8
vllm/transformers_utils/tokenizers/cpm_9g.py
vllm/transformers_utils/tokenizers/cpm_9g.py
+0
-483
vllm/v1/engine/detokenizer.py
vllm/v1/engine/detokenizer.py
+0
-2
No files found.
vllm/config/model.py
View file @
ac4f685b
...
...
@@ -74,7 +74,7 @@ logger = init_logger(__name__)
RunnerOption
=
Literal
[
"auto"
,
RunnerType
]
ConvertType
=
Literal
[
"none"
,
"embed"
,
"classify"
,
"reward"
]
ConvertOption
=
Literal
[
"auto"
,
ConvertType
]
TokenizerMode
=
Literal
[
"auto"
,
"hf"
,
"slow"
,
"mistral"
,
"deepseek_v32"
,
"cpm"
]
TokenizerMode
=
Literal
[
"auto"
,
"hf"
,
"slow"
,
"mistral"
,
"deepseek_v32"
]
ModelDType
=
Literal
[
"auto"
,
"half"
,
"float16"
,
"bfloat16"
,
"float"
,
"float32"
]
LogprobsMode
=
Literal
[
"raw_logits"
,
"raw_logprobs"
,
"processed_logits"
,
"processed_logprobs"
...
...
vllm/entrypoints/openai/serving_engine.py
View file @
ac4f685b
...
...
@@ -118,7 +118,6 @@ from vllm.utils.async_utils import (
)
from
vllm.utils.collection_utils
import
is_list_of
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.transformers_utils.tokenizers
import
CPM9GTokenizer
class
GenerationError
(
Exception
):
...
...
@@ -260,9 +259,6 @@ class OpenAIServing:
self
.
io_processor
=
self
.
models
.
io_processor
self
.
model_config
=
self
.
models
.
model_config
self
.
max_model_len
=
self
.
model_config
.
max_model_len
self
.
tokenizer_mode
=
self
.
models
.
model_config
.
tokenizer_mode
if
self
.
models
.
model_config
.
tokenizer_mode
==
"cpm"
:
self
.
tokenizer
=
CPM9GTokenizer
(
self
.
models
.
model_config
.
model
,
trust_remote_code
=
True
)
def
_get_tool_parser
(
self
,
tool_parser_name
:
str
|
None
=
None
,
enable_auto_tools
:
bool
=
False
...
...
@@ -937,11 +933,8 @@ class OpenAIServing:
max_length
=
truncate_prompt_tokens
,
)
if
self
.
tokenizer_mode
==
"cpm"
:
input_ids
=
[
self
.
tokenizer
.
bos_id
]
+
self
.
tokenizer
.
encode
(
prompt
)
else
:
input_ids
=
encoded
.
input_ids
input_ids
=
encoded
.
input_ids
input_text
=
prompt
return
self
.
_validate_input
(
request
,
input_ids
,
input_text
)
...
...
@@ -965,7 +958,7 @@ class OpenAIServing:
input_text
=
""
else
:
async_tokenizer
=
self
.
_get_async_tokenizer
(
tokenizer
)
input_text
=
await
async_tokenizer
.
decode
(
input_ids
)
if
self
.
tokenizer_mode
!=
"cpm"
else
await
self
.
tokenizer
.
decode_all
(
input_ids
)
input_text
=
await
async_tokenizer
.
decode
(
input_ids
)
return
self
.
_validate_input
(
request
,
input_ids
,
input_text
)
...
...
vllm/inputs/preprocess.py
View file @
ac4f685b
...
...
@@ -226,9 +226,6 @@ class InputPreprocessor:
if
encoder_config
and
encoder_config
.
get
(
"do_lower_case"
,
False
):
prompt
=
prompt
.
lower
()
if
self
.
model_config
.
tokenizer_mode
==
"cpm"
:
return
[
tokenizer
.
bos_id
]
+
tokenizer
.
encode
(
prompt
)
else
:
return
tokenizer
.
encode
(
prompt
,
**
tokenization_kwargs
)
def
_get_mm_processor
(
self
)
->
BaseMultiModalProcessor
:
...
...
vllm/model_executor/models/fm9g.py
deleted
100644 → 0
View file @
05e8b083
This diff is collapsed.
Click to expand it.
vllm/model_executor/models/registry.py
View file @
ac4f685b
...
...
@@ -95,7 +95,6 @@ _TEXT_GENERATION_MODELS = {
"Ernie4_5_MoeForCausalLM"
:
(
"ernie45_moe"
,
"Ernie4_5_MoeForCausalLM"
),
"ExaoneForCausalLM"
:
(
"exaone"
,
"ExaoneForCausalLM"
),
"Exaone4ForCausalLM"
:
(
"exaone4"
,
"Exaone4ForCausalLM"
),
"FM9GForCausalLM"
:
(
"fm9g"
,
"FM9GForCausalLM"
),
"Fairseq2LlamaForCausalLM"
:
(
"fairseq2_llama"
,
"Fairseq2LlamaForCausalLM"
),
"FalconForCausalLM"
:
(
"falcon"
,
"FalconForCausalLM"
),
"FalconMambaForCausalLM"
:
(
"mamba"
,
"MambaForCausalLM"
),
...
...
vllm/tokenizers/detokenizer_utils.py
View file @
ac4f685b
...
...
@@ -16,7 +16,6 @@ def _convert_tokens_to_string_with_added_encoders(
output_tokens
:
list
[
str
],
skip_special_tokens
:
bool
,
spaces_between_special_tokens
:
bool
,
mode
:
str
,
)
->
str
:
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/tokenization_utils.py#L921
...
...
@@ -30,12 +29,9 @@ def _convert_tokens_to_string_with_added_encoders(
current_sub_text
:
list
[
str
]
=
[]
convert_tokens_to_string
=
tokenizer
.
convert_tokens_to_string
added_vocab_set
=
set
(
tokenizer
.
get_added_vocab
())
if
mode
!=
"cpm"
:
all_special_tokens
=
(
set
(
tokenizer
.
all_special_tokens
)
if
skip_special_tokens
else
()
)
else
:
all_special_tokens
=
tokenizer
.
_special_token_set
for
token
in
output_tokens
:
# Use precomputed set for skip-special check
...
...
@@ -49,10 +45,7 @@ def _convert_tokens_to_string_with_added_encoders(
else
:
current_sub_text
.
append
(
token
)
if
current_sub_text
:
if
mode
!=
"cpm"
:
sub_texts
.
append
(
convert_tokens_to_string
(
current_sub_text
))
else
:
sub_texts
=
tokenizer
.
decode
(
current_sub_text
)
if
spaces_between_special_tokens
:
return
" "
.
join
(
sub_texts
)
return
""
.
join
(
sub_texts
)
...
...
@@ -122,7 +115,6 @@ def detokenize_incrementally(
read_offset
:
int
,
skip_special_tokens
:
bool
=
False
,
spaces_between_special_tokens
:
bool
=
True
,
mode
:
str
=
"cpm"
,
)
->
tuple
[
list
[
str
],
str
,
int
,
int
]:
"""Detokenizes the input ids incrementally and returns the new tokens
and the new text.
...
...
@@ -158,11 +150,7 @@ def detokenize_incrementally(
assert
prev_tokens
is
not
None
# If the new token id is out of bounds, return an empty string.
if
mode
==
"cpm"
:
vocab_size
=
tokenizer
.
vocab_size
else
:
vocab_size
=
len
(
tokenizer
)
if
0
<=
new_token_id
<
vocab_size
:
if
0
<=
new_token_id
<
len
(
tokenizer
):
# Put new_token_id in a list so skip_special_tokens is respected
new_tokens
=
tokenizer
.
convert_ids_to_tokens
(
[
new_token_id
],
skip_special_tokens
=
skip_special_tokens
...
...
@@ -191,14 +179,12 @@ def detokenize_incrementally(
output_tokens
[
prefix_offset
:
read_offset
],
skip_special_tokens
=
skip_special_tokens
,
spaces_between_special_tokens
=
spaces_between_special_tokens
,
mode
=
mode
,
)
new_text
=
_convert_tokens_to_string_with_added_encoders
(
tokenizer
,
output_tokens
[
prefix_offset
:],
skip_special_tokens
=
skip_special_tokens
,
spaces_between_special_tokens
=
spaces_between_special_tokens
,
mode
=
mode
,
)
if
len
(
new_text
)
<=
len
(
prefix_text
)
or
new_text
.
endswith
(
"�"
):
...
...
vllm/transformers_utils/configs/__init__.py
View file @
ac4f685b
...
...
@@ -26,7 +26,6 @@ _CLASS_TO_MODULE: dict[str, str] = {
"HunYuanVLConfig"
:
"vllm.transformers_utils.configs.hunyuan_vl"
,
"HunYuanVLTextConfig"
:
"vllm.transformers_utils.configs.hunyuan_vl"
,
"HunYuanVLVisionConfig"
:
"vllm.transformers_utils.configs.hunyuan_vl"
,
"FM9GConfig"
:
"vllm.transformers_utils.configs.fm9g"
,
# RWConfig is for the original tiiuae/falcon-40b(-instruct) and
# tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
# `FalconConfig` class from the official HuggingFace transformers library.
...
...
@@ -63,7 +62,6 @@ __all__ = [
"DeepseekV3Config"
,
"DotsOCRConfig"
,
"EAGLEConfig"
,
"FM9GConfig"
,
"FlexOlmoConfig"
,
"HunYuanVLConfig"
,
"HunYuanVLTextConfig"
,
...
...
vllm/transformers_utils/configs/fm9g.py
deleted
100644 → 0
View file @
05e8b083
# coding=utf-8
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""FM9G model configuration"""
from
transformers.configuration_utils
import
PretrainedConfig
from
transformers.utils
import
logging
logger
=
logging
.
get_logger
(
__name__
)
FM9G_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{}
class
FM9GConfig
(
PretrainedConfig
):
r
"""
This is the configuration class to store the configuration of a [`FM9GModel`]. It is used to instantiate an FM9G
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the FM9G-7B.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
vocab_size (`int`, *optional*, defaults to 32000):
Vocabulary size of the FM9G model. Defines the number of different tokens that can be represented by the
`inputs_ids` passed when calling [`FM9GModel`]
hidden_size (`int`, *optional*, defaults to 4096):
Dimension of the hidden representations.
intermediate_size (`int`, *optional*, defaults to 11008):
Dimension of the MLP representations.
num_hidden_layers (`int`, *optional*, defaults to 32):
Number of hidden layers in the Transformer decoder.
num_attention_heads (`int`, *optional*, defaults to 32):
Number of attention heads for each attention layer in the Transformer decoder.
num_key_value_heads (`int`, *optional*):
This is the number of key_value heads that should be used to implement Grouped Query Attention. If
`num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
`num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
by meanpooling all the original heads within that group. For more details checkout [this
paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
`num_attention_heads`.
hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
The non-linear activation function (function or string) in the decoder.
max_position_embeddings (`int`, *optional*, defaults to 2048):
The maximum sequence length that this model might ever be used with.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
rms_norm_eps (`float`, *optional*, defaults to 1e-06):
The epsilon used by the rms normalization layers.
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if `config.is_decoder=True`.
pad_token_id (`int`, *optional*):
Padding token id.
bos_token_id (`int`, *optional*, defaults to 1):
Beginning of stream token id.
eos_token_id (`int`, *optional*, defaults to 2):
End of stream token id.
pretraining_tp (`int`, *optional*, defaults to 1):
Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is
necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
issue](https://github.com/pytorch/pytorch/issues/76232).
tie_word_embeddings (`bool`, *optional*, defaults to `False`):
Whether to tie weight embeddings
rope_theta (`float`, *optional*, defaults to 10000.0):
The base period of the RoPE embeddings.
rope_scaling (`Dict`, *optional*):
Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
`{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
`max_position_embeddings` to the expected new maximum.
attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
Whether to use a bias in the query, key, value and output projection layers during self-attention.
attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
"""
model_type
=
"fm9g"
keys_to_ignore_at_inference
=
[
"past_key_values"
]
def
__init__
(
self
,
vocab_size
=
32000
,
hidden_size
=
4096
,
intermediate_size
=
11008
,
num_hidden_layers
=
32
,
num_attention_heads
=
32
,
num_key_value_heads
=
None
,
hidden_act
=
"silu"
,
max_position_embeddings
=
2048
,
initializer_range
=
0.02
,
rms_norm_eps
=
1e-6
,
use_cache
=
True
,
pad_token_id
=
None
,
bos_token_id
=
1
,
eos_token_id
=
2
,
pretraining_tp
=
1
,
tie_word_embeddings
=
True
,
rope_theta
=
10000.0
,
rope_scaling
=
None
,
attention_bias
=
False
,
attention_dropout
=
0.0
,
scale_emb
=
1
,
dim_model_base
=
1
,
scale_depth
=
1
,
**
kwargs
,
):
self
.
vocab_size
=
vocab_size
self
.
max_position_embeddings
=
max_position_embeddings
self
.
hidden_size
=
hidden_size
self
.
intermediate_size
=
intermediate_size
self
.
num_hidden_layers
=
num_hidden_layers
self
.
num_attention_heads
=
num_attention_heads
# for backward compatibility
if
num_key_value_heads
is
None
:
num_key_value_heads
=
num_attention_heads
self
.
num_key_value_heads
=
num_key_value_heads
self
.
hidden_act
=
hidden_act
self
.
initializer_range
=
initializer_range
self
.
rms_norm_eps
=
rms_norm_eps
self
.
pretraining_tp
=
pretraining_tp
self
.
use_cache
=
use_cache
self
.
rope_theta
=
rope_theta
self
.
rope_scaling
=
rope_scaling
self
.
_rope_scaling_validation
()
self
.
attention_bias
=
attention_bias
self
.
attention_dropout
=
attention_dropout
self
.
scale_emb
=
scale_emb
self
.
dim_model_base
=
dim_model_base
self
.
scale_depth
=
scale_depth
super
().
__init__
(
pad_token_id
=
pad_token_id
,
bos_token_id
=
bos_token_id
,
eos_token_id
=
eos_token_id
,
tie_word_embeddings
=
tie_word_embeddings
,
**
kwargs
,
)
try
:
import
flash_attn
self
.
_attn_implementation
=
"flash_attention_2"
except
:
pass
def
_rope_scaling_validation
(
self
):
"""
Validate the `rope_scaling` configuration.
"""
if
self
.
rope_scaling
is
None
:
return
if
not
isinstance
(
self
.
rope_scaling
,
dict
)
or
len
(
self
.
rope_scaling
)
!=
2
:
raise
ValueError
(
"`rope_scaling` must be a dictionary with with two fields, `type` and `factor`, "
f
"got
{
self
.
rope_scaling
}
"
)
rope_scaling_type
=
self
.
rope_scaling
.
get
(
"type"
,
None
)
rope_scaling_factor
=
self
.
rope_scaling
.
get
(
"factor"
,
None
)
if
rope_scaling_type
is
None
or
rope_scaling_type
not
in
[
"linear"
,
"dynamic"
]:
raise
ValueError
(
f
"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got
{
rope_scaling_type
}
"
)
if
rope_scaling_factor
is
None
or
not
isinstance
(
rope_scaling_factor
,
float
)
or
rope_scaling_factor
<=
1.0
:
raise
ValueError
(
f
"`rope_scaling`'s factor field must be a float > 1, got
{
rope_scaling_factor
}
"
)
\ No newline at end of file
vllm/transformers_utils/tokenizers/__init__.py
deleted
100644 → 0
View file @
05e8b083
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
vllm.transformers_utils.tokenizers.cpm_9g
import
CPM9GTokenizer
__all__
=
[
"CPM9GTokenizer"
]
vllm/transformers_utils/tokenizers/cpm_9g.py
deleted
100644 → 0
View file @
05e8b083
import
io
import
json
import
os
from
shutil
import
copyfile
from
typing
import
Any
,
Dict
,
IO
,
List
,
Optional
,
Tuple
# import pkg_resources
import
sentencepiece
as
spm
from
pytrie
import
StringTrie
from
transformers.tokenization_utils
import
AddedToken
,
PreTrainedTokenizer
from
transformers.utils
import
logging
logger
=
logging
.
get_logger
(
__name__
)
VOCAB_FILES_NAMES
=
{
"vocab_file"
:
"vocab.txt"
}
PRETRAINED_VOCAB_FILES_MAP
=
{
"vocab_file"
:
{},
"tokenizer_file"
:
{},
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
=
{}
class
CPM9GTokenizer
(
PreTrainedTokenizer
):
"""
CPM9G 分词器类。用于基于字节对编码的分词。
参数:
path (str, 可选): 词汇表文件的路径。
"""
vocab_files_names
=
VOCAB_FILES_NAMES
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
model_input_names
=
[
"input_ids"
,
"attention_mask"
]
def
__init__
(
self
,
vocab_file
:
Optional
[
str
]
=
None
,
unk_token
:
str
=
"<unk>"
,
bos_token
:
str
=
"<s>"
,
eos_token
:
str
=
"</s>"
,
pad_token
:
Optional
[
str
]
=
None
,
sp_model_kwargs
:
Optional
[
Dict
[
str
,
Any
]]
=
None
,
add_bos_token
:
bool
=
True
,
add_eos_token
:
bool
=
False
,
clean_up_tokenization_spaces
:
bool
=
False
,
**
kwargs
,
):
self
.
sp_model_kwargs
=
sp_model_kwargs
or
{}
self
.
vocab_file
=
vocab_file
self
.
add_bos_token
=
add_bos_token
self
.
add_eos_token
=
add_eos_token
self
.
unk_token
=
unk_token
self
.
bos_token
=
bos_token
self
.
eos_token
=
eos_token
self
.
pad_token
=
pad_token
self
.
byte_list
:
List
[
str
]
=
(
[
f
"<0x0
{
hex
(
i
).
upper
()[
2
:]
}
>"
for
i
in
range
(
0x10
)]
+
[
f
"<0x
{
hex
(
i
).
upper
()[
2
:]
}
>"
for
i
in
range
(
0x10
,
0x100
)]
)
self
.
_special_token_set
=
set
([
self
.
unk_token
,
self
.
bos_token
,
self
.
eos_token
]
+
self
.
byte_list
)
if
vocab_file
:
if
'vocab.txt'
not
in
vocab_file
:
all_tokens
=
self
.
load_vocab
(
io
.
FileIO
(
os
.
path
.
join
(
vocab_file
,
VOCAB_FILES_NAMES
[
'vocab_file'
]),
"rb"
))
else
:
all_tokens
=
self
.
load_vocab
(
io
.
FileIO
(
VOCAB_FILES_NAMES
[
'vocab_file'
],
"rb"
))
self
.
encoder
:
Dict
[
str
,
int
]
=
{}
self
.
_special_encoder
:
Dict
[
str
,
int
]
=
{}
for
token
,
token_id
in
all_tokens
.
items
():
if
token
in
self
.
_special_token_set
:
self
.
_special_encoder
[
token
]
=
token_id
else
:
self
.
encoder
[
token
]
=
token_id
self
.
decoder
=
{
v
:
k
for
k
,
v
in
self
.
encoder
.
items
()}
self
.
_byte_decoder
=
{
self
.
_special_encoder
[
token
]:
i
for
i
,
token
in
enumerate
(
self
.
byte_list
)}
self
.
_max_word_len
=
max
([
len
(
x
)
for
x
in
self
.
encoder
.
keys
()])
self
.
_len_word_first
=
{}
for
x
in
self
.
encoder
.
keys
():
if
not
x
[
0
]
in
self
.
_len_word_first
:
self
.
_len_word_first
[
x
[
0
]]
=
1
if
len
(
x
)
>
self
.
_len_word_first
[
x
[
0
]]:
self
.
_len_word_first
[
x
[
0
]]
=
len
(
x
)
self
.
tencoder
=
StringTrie
(
self
.
encoder
)
self
.
_max_token_id
=
self
.
vocab_size
-
1
super
().
__init__
(
bos_token
=
AddedToken
(
bos_token
,
lstrip
=
False
,
rstrip
=
False
),
eos_token
=
AddedToken
(
eos_token
,
lstrip
=
False
,
rstrip
=
False
),
unk_token
=
AddedToken
(
unk_token
,
lstrip
=
False
,
rstrip
=
False
),
pad_token
=
AddedToken
(
pad_token
,
lstrip
=
False
,
rstrip
=
False
)
if
pad_token
else
None
,
add_bos_token
=
add_bos_token
,
add_eos_token
=
add_eos_token
,
sp_model_kwargs
=
self
.
sp_model_kwargs
,
clean_up_tokenization_spaces
=
clean_up_tokenization_spaces
,
**
kwargs
,
)
def
__getstate__
(
self
)
->
Dict
[
str
,
Any
]:
state
=
self
.
__dict__
.
copy
()
state
[
"sp_model"
]
=
None
return
state
def
__setstate__
(
self
,
d
:
Dict
[
str
,
Any
])
->
None
:
self
.
__dict__
=
d
def
load_vocab
(
self
,
fp
:
IO
[
bytes
])
->
Dict
[
str
,
int
]:
"""
加载词汇表文件到字典中。
参数:
fp (IO[bytes]): 词汇表文件指针。
返回:
Dict[str, int]: 词汇表字典。
"""
vocab
:
Dict
[
str
,
int
]
=
{}
reader
=
io
.
TextIOWrapper
(
fp
,
encoding
=
"utf-8"
)
for
token
in
reader
.
readlines
():
token
=
token
.
strip
()
if
len
(
token
)
==
0
:
continue
token
=
json
.
loads
(
token
)
vocab
[
token
]
=
len
(
vocab
)
return
vocab
@
property
def
vocab_size
(
self
)
->
int
:
"""返回词汇表大小"""
return
len
(
self
.
encoder
)
+
len
(
self
.
_special_encoder
)
@
property
def
max_token_id
(
self
)
->
int
:
return
self
.
_max_token_id
@
property
def
eos_id
(
self
):
return
self
.
_special_encoder
[
self
.
eos_token
]
@
property
def
bos_id
(
self
):
return
self
.
_special_encoder
[
self
.
bos_token
]
@
property
def
unk_id
(
self
):
return
self
.
_special_encoder
[
self
.
unk_token
]
def
get_vocab
(
self
)
->
Dict
[
str
,
int
]:
"""返回词汇表作为字典"""
vocab
=
{
self
.
convert_ids_to_tokens
(
i
):
i
for
i
in
range
(
self
.
vocab_size
)}
vocab
.
update
(
self
.
added_tokens_encoder
)
return
vocab
def
_tokenize
(
self
,
text
:
str
)
->
List
[
str
]:
"""返回分词后的字符串"""
output_tokens
:
List
[
str
]
=
[]
st
=
0
while
st
<
len
(
text
):
piece
=
self
.
get_piece
(
text
[
st
:])
output_tokens
.
append
(
piece
)
st
+=
len
(
piece
)
return
output_tokens
def
_convert_token_to_id
(
self
,
token
:
str
)
->
int
:
"""使用词汇表将标记(字符串)转换为 id"""
return
self
.
encoder
.
get
(
token
,
self
.
unk_id
)
def
_convert_id_to_token
(
self
,
index
:
int
)
->
str
:
"""使用词汇表将索引(整数)转换为标记(字符串)"""
return
self
.
decoder
.
get
(
index
,
self
.
unk_token
)
def
convert_tokens_to_string
(
self
,
tokens
:
List
[
str
])
->
str
:
"""将标记序列(字符串)转换为单个字符串"""
current_sub_tokens
:
List
[
str
]
=
[]
out_string
=
""
prev_is_special
=
False
for
i
,
token
in
enumerate
(
tokens
):
if
token
in
self
.
_special_token_set
:
if
not
prev_is_special
and
i
!=
0
:
out_string
+=
" "
out_string
+=
self
.
decode
(
current_sub_tokens
)
+
token
prev_is_special
=
True
current_sub_tokens
=
[]
else
:
current_sub_tokens
.
append
(
token
)
prev_is_special
=
False
out_string
+=
self
.
sp_model
.
decode
(
current_sub_tokens
)
return
out_string
def
save_vocabulary
(
self
,
save_directory
:
str
,
filename_prefix
:
Optional
[
str
]
=
None
)
->
Tuple
[
str
]:
"""
保存词汇表和特殊标记文件到目录。
参数:
save_directory (str): 要保存词汇表的目录。
返回:
Tuple[str]: 保存的文件路径。
"""
if
not
os
.
path
.
isdir
(
save_directory
):
raise
ValueError
(
f
"Vocabulary path (
{
save_directory
}
) should be a directory"
)
out_vocab_file
=
os
.
path
.
join
(
save_directory
,
(
filename_prefix
+
"-"
if
filename_prefix
else
""
)
+
VOCAB_FILES_NAMES
[
"vocab_file"
],
)
if
os
.
path
.
abspath
(
self
.
vocab_file
)
!=
os
.
path
.
abspath
(
out_vocab_file
)
and
os
.
path
.
isfile
(
self
.
vocab_file
):
copyfile
(
self
.
vocab_file
,
out_vocab_file
)
elif
not
os
.
path
.
isfile
(
self
.
vocab_file
):
with
open
(
out_vocab_file
,
"wb"
)
as
fi
:
fi
.
write
(
self
.
sp_model
.
serialized_model_proto
())
return
(
out_vocab_file
,
)
def
build_inputs_with_special_tokens
(
self
,
token_ids_0
:
List
[
int
],
token_ids_1
:
Optional
[
List
[
int
]]
=
None
)
->
List
[
int
]:
bos_token_id
=
[
self
.
bos_token_id
]
if
self
.
add_bos_token
else
[]
eos_token_id
=
[
self
.
eos_token_id
]
if
self
.
add_eos_token
else
[]
output
=
bos_token_id
+
token_ids_0
+
eos_token_id
if
token_ids_1
is
not
None
:
output
=
output
+
bos_token_id
+
token_ids_1
+
eos_token_id
return
output
def
get_special_tokens_mask
(
self
,
token_ids_0
:
List
[
int
],
token_ids_1
:
Optional
[
List
[
int
]]
=
None
,
already_has_special_tokens
:
bool
=
False
)
->
List
[
int
]:
"""
获取从未添加特殊标记的标记列表中检索到的序列 id。
在使用分词器的 `prepare_for_model` 方法添加特殊标记时调用此方法。
参数:
token_ids_0 (List[int]): id 列表。
token_ids_1 (List[int], 可选): 序列对的可选第二 id 列表。
already_has_special_tokens (bool, 可选, 默认值为 False):
标记列表是否已使用模型的特殊标记进行格式化。
返回:
List[int]: 一个包含整数(0 或 1)的列表。1 表示特殊标记,0 表示序列标记。
"""
if
already_has_special_tokens
:
return
super
().
get_special_tokens_mask
(
token_ids_0
=
token_ids_0
,
token_ids_1
=
token_ids_1
,
already_has_special_tokens
=
True
,
)
bos_token_id
=
[
1
]
if
self
.
add_bos_token
else
[]
eos_token_id
=
[
1
]
if
self
.
add_eos_token
else
[]
if
token_ids_1
is
None
:
return
bos_token_id
+
([
0
]
*
len
(
token_ids_0
))
+
eos_token_id
return
bos_token_id
+
([
0
]
*
len
(
token_ids_0
))
+
eos_token_id
+
bos_token_id
+
([
0
]
*
len
(
token_ids_1
))
+
eos_token_id
def
create_token_type_ids_from_sequences
(
self
,
token_ids_0
:
List
[
int
],
token_ids_1
:
Optional
[
List
[
int
]]
=
None
)
->
List
[
int
]:
"""
从传递的两个序列创建掩码,用于序列对分类任务。
参数:
token_ids_0 (List[int]): id 列表。
token_ids_1 (List[int], 可选): 序列对的可选第二 id 列表。
返回:
List[int]: 根据给定序列的标记类型 id 列表。
"""
bos_token_id
=
[
self
.
bos_token_id
]
if
self
.
add_bos_token
else
[]
eos_token_id
=
[
self
.
eos_token_id
]
if
self
.
add_eos_token
else
[]
output
=
[
0
]
*
len
(
bos_token_id
+
token_ids_0
+
eos_token_id
)
if
token_ids_1
is
not
None
:
output
+=
[
1
]
*
len
(
bos_token_id
+
token_ids_1
+
eos_token_id
)
return
output
def
get_piece
(
self
,
text
:
str
)
->
str
:
"""
获取文本中的分词片段。
参数:
text (str): 输入文本。
返回:
str: 分词片段。
"""
if
text
[
0
]
in
self
.
_len_word_first
:
text
=
text
[:
self
.
_len_word_first
[
text
[
0
]]]
len_text
=
len
(
text
)
for
i
in
range
(
len
(
text
)):
sub
=
text
[:
len_text
-
i
]
if
sub
in
self
.
encoder
:
return
sub
return
text
[
0
]
def
encode
(
self
,
text
:
str
)
->
List
[
int
]:
"""
将文本编码为 ID 列表。
参数:
text (str): 输入文本。
返回:
List[int]: 编码后的 ID 列表。
"""
#if len(text) > 20480:
# return [0 for _ in range(20480)]
ret
=
[]
for
x
in
self
.
_tokenize
(
text
):
if
x
in
self
.
encoder
:
ret
.
append
(
self
.
encoder
[
x
])
else
:
ret
.
extend
(
self
.
_encode_unicode
(
x
))
return
ret
def
decode_all
(
self
,
tokens
:
List
[
int
]):
"""Decode ids into a string."""
ret
=
[]
st
=
0
while
st
<
len
(
tokens
):
if
tokens
[
st
]
in
self
.
decoder
:
ret
.
append
(
self
.
decoder
[
tokens
[
st
]])
st
+=
1
elif
tokens
[
st
]
in
self
.
_byte_decoder
:
if
(
st
+
3
<
len
(
tokens
)
and
tokens
[
st
+
1
]
in
self
.
_byte_decoder
and
tokens
[
st
+
2
]
in
self
.
_byte_decoder
and
tokens
[
st
+
3
]
in
self
.
_byte_decoder
):
first_id
=
self
.
_byte_decoder
[
tokens
[
st
]]
plane_id
=
self
.
_byte_decoder
[
tokens
[
st
+
1
]]
row_id
=
self
.
_byte_decoder
[
tokens
[
st
+
2
]]
cell_id
=
self
.
_byte_decoder
[
tokens
[
st
+
3
]]
ret
.
append
(
int
.
to_bytes
(
first_id
<<
24
|
plane_id
<<
16
|
row_id
<<
8
|
cell_id
,
4
,
"big"
).
decode
(
"utf-8"
)
)
st
+=
4
elif
(
st
+
2
<
len
(
tokens
)
and
tokens
[
st
+
1
]
in
self
.
_byte_decoder
and
tokens
[
st
+
2
]
in
self
.
_byte_decoder
):
plane_id
=
self
.
_byte_decoder
[
tokens
[
st
]]
row_id
=
self
.
_byte_decoder
[
tokens
[
st
+
1
]]
cell_id
=
self
.
_byte_decoder
[
tokens
[
st
+
2
]]
ret
.
append
(
int
.
to_bytes
(
plane_id
<<
16
|
row_id
<<
8
|
cell_id
,
3
,
"big"
).
decode
(
"utf-8"
))
st
+=
3
elif
st
+
1
<
len
(
tokens
)
and
tokens
[
st
+
1
]
in
self
.
_byte_decoder
:
row_id
=
self
.
_byte_decoder
[
tokens
[
st
]]
cell_id
=
self
.
_byte_decoder
[
tokens
[
st
+
1
]]
ret
.
append
(
int
.
to_bytes
(
row_id
<<
8
|
cell_id
,
2
,
"big"
).
decode
(
"utf-8"
))
st
+=
2
else
:
cell_id
=
self
.
_byte_decoder
[
tokens
[
st
]]
ret
.
append
(
int
.
to_bytes
(
cell_id
,
1
,
"big"
).
decode
(
"utf-8"
))
st
+=
1
elif
tokens
[
st
]
==
self
.
eos_id
:
ret
.
append
(
self
.
eos_token
)
st
+=
1
elif
tokens
[
st
]
==
self
.
bos_id
:
ret
.
append
(
self
.
bos_token
)
st
+=
1
else
:
ret
.
append
(
self
.
unk_token
)
st
+=
1
return
""
.
join
(
ret
)
def
decode
(
self
,
tokens
:
List
[
int
])
->
str
:
"""
将 ID 列表解码为字符串。
参数:
tokens (List[int]): ID 列表。
返回:
str: 解码后的字符串。
"""
ret
=
[]
st
=
0
while
st
<
len
(
tokens
):
if
tokens
[
st
]
in
self
.
_byte_decoder
:
if
(
st
+
3
<
len
(
tokens
)
and
tokens
[
st
+
1
]
in
self
.
_byte_decoder
and
tokens
[
st
+
2
]
in
self
.
_byte_decoder
and
tokens
[
st
+
3
]
in
self
.
_byte_decoder
):
first_id
=
self
.
_byte_decoder
[
tokens
[
st
]]
plane_id
=
self
.
_byte_decoder
[
tokens
[
st
+
1
]]
row_id
=
self
.
_byte_decoder
[
tokens
[
st
+
2
]]
cell_id
=
self
.
_byte_decoder
[
tokens
[
st
+
3
]]
ret
.
append
(
int
.
to_bytes
(
first_id
<<
24
|
plane_id
<<
16
|
row_id
<<
8
|
cell_id
,
4
,
"big"
).
decode
(
"utf-8"
)
)
st
+=
4
elif
(
st
+
2
<
len
(
tokens
)
and
tokens
[
st
+
1
]
in
self
.
_byte_decoder
and
tokens
[
st
+
2
]
in
self
.
_byte_decoder
):
plane_id
=
self
.
_byte_decoder
[
tokens
[
st
]]
row_id
=
self
.
_byte_decoder
[
tokens
[
st
+
1
]]
cell_id
=
self
.
_byte_decoder
[
tokens
[
st
+
2
]]
ret
.
append
(
int
.
to_bytes
(
plane_id
<<
16
|
row_id
<<
8
|
cell_id
,
3
,
"big"
).
decode
(
"utf-8"
))
st
+=
3
elif
st
+
1
<
len
(
tokens
)
and
tokens
[
st
+
1
]
in
self
.
_byte_decoder
:
row_id
=
self
.
_byte_decoder
[
tokens
[
st
]]
cell_id
=
self
.
_byte_decoder
[
tokens
[
st
+
1
]]
ret
.
append
(
int
.
to_bytes
(
row_id
<<
8
|
cell_id
,
2
,
"big"
).
decode
(
"utf-8"
))
st
+=
2
else
:
cell_id
=
self
.
_byte_decoder
[
tokens
[
st
]]
ret
.
append
(
int
.
to_bytes
(
cell_id
,
1
,
"big"
).
decode
(
"utf-8"
))
st
+=
1
elif
tokens
[
st
]
==
self
.
eos_id
:
ret
.
append
(
self
.
eos_token
)
st
+=
1
elif
tokens
[
st
]
==
self
.
bos_id
:
ret
.
append
(
self
.
bos_token
)
st
+=
1
else
:
ret
.
append
(
tokens
[
st
])
st
+=
1
#else:
# ret.append(self.unk_token)
# st += 1
return
''
.
join
(
ret
)
def
_encode_unicode
(
self
,
token
:
str
)
->
List
[
int
]:
"""
将 Unicode 编码包装到一个辅助函数中。
参数:
token (str): 要编码的标记。
返回:
List[int]: 编码后的 ID 列表。
"""
ids
=
[]
utf8_id
=
token
.
encode
(
"utf-8"
)
for
_id
in
utf8_id
:
ids
.
append
(
self
.
_special_encoder
[
self
.
byte_list
[
_id
]])
return
ids
def
next_token
(
self
,
text
:
str
)
->
Tuple
[
str
,
List
[
int
]]:
"""
快速获取下一个匹配的标记。
参数:
text (str): 输入文本。
返回:
Tuple[str, List[int]]: 匹配的标记及其 ID 列表。
"""
token
,
token_id
=
self
.
tencoder
.
longest_prefix_item
(
text
,
(
None
,
None
))
if
token
is
None
:
token
=
text
[
0
]
token_ids
=
self
.
_encode_unicode
(
token
)
else
:
token_ids
=
[
token_id
]
return
token
,
token_ids
\ No newline at end of file
vllm/v1/engine/detokenizer.py
View file @
ac4f685b
...
...
@@ -258,7 +258,6 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer):
class
SlowIncrementalDetokenizer
(
BaseIncrementalDetokenizer
):
def
__init__
(
self
,
tokenizer
:
TokenizerLike
,
request
:
EngineCoreRequest
,
mode
=
"auto"
):
super
().
__init__
(
request
)
self
.
mode
=
mode
self
.
tokenizer
=
tokenizer
params
=
request
.
sampling_params
...
...
@@ -305,7 +304,6 @@ class SlowIncrementalDetokenizer(BaseIncrementalDetokenizer):
read_offset
=
self
.
read_offset
,
skip_special_tokens
=
self
.
skip_special_tokens
,
spaces_between_special_tokens
=
self
.
spaces_between_special_tokens
,
mode
=
self
.
mode
,
)
self
.
tokens
.
extend
(
new_tokens
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment