Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Qwen2.5_vllm
Commits
3c631e30
Commit
3c631e30
authored
Oct 17, 2024
by
laibao
Browse files
No commit message
No commit message
parents
Pipeline
#1770
canceled with stages
Changes
66
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
333 additions
and
0 deletions
+333
-0
examples/template_inkbot.jinja
examples/template_inkbot.jinja
+31
-0
examples/template_llama_chat.jinja
examples/template_llama_chat.jinja
+25
-0
examples/template_llava.jinja
examples/template_llava.jinja
+23
-0
examples/tensorize_vllm_model.py
examples/tensorize_vllm_model.py
+244
-0
icon.png
icon.png
+0
-0
model.properties
model.properties
+10
-0
No files found.
examples/template_inkbot.jinja
0 → 100644
View file @
3c631e30
<#meta#>
- Date: {{ (messages|selectattr('role', 'equalto', 'meta-current_date')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-current_date')|list) else '' }}
- Task: {{ (messages|selectattr('role', 'equalto', 'meta-task_name')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'meta-task_name')|list) else '' }}
<#system#>
{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}
<#chat#>
{% for message in messages %}
{% if message['role'] == 'user' %}
<#user#>
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% elif message['role'] == 'assistant' %}
<#bot#>
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% elif message['role'] == 'user_context' %}
<#user_context#>
{{ message['content']|trim -}}
{% if not loop.last %}
{% endif %}
{% endif %}
{% endfor %}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}
<#bot#>
{% endif %}
\ No newline at end of file
examples/template_llama_chat.jinja
0 → 100644
View file @
3c631e30
{% if messages[0]['role'] == 'system' %}
{% set system_message = '<<SYS>>\n' + messages[0]['content'] | trim + '\n<</SYS>>\n\n' %}
{% set messages = messages[1:] %}
{% else %}
{% set system_message = '' %}
{% endif %}
{% for message in messages %}
{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
{% endif %}
{% if loop.index0 == 0 %}
{% set content = system_message + message['content'] %}
{% else %}
{% set content = message['content'] %}
{% endif %}
{% if message['role'] == 'user' %}
{{ bos_token + '[INST] ' + content | trim + ' [/INST]' }}
{% elif message['role'] == 'assistant' %}
{{ ' ' + content | trim + ' ' + eos_token }}
{% endif %}
{% endfor %}
\ No newline at end of file
examples/template_llava.jinja
0 → 100644
View file @
3c631e30
{%- if messages[0]['role'] == 'system' -%}
{%- set system_message = messages[0]['content'] -%}
{%- set messages = messages[1:] -%}
{%- else -%}
{% set system_message = '' -%}
{%- endif -%}
{{ bos_token + system_message }}
{%- for message in messages -%}
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
{%- endif -%}
{%- if message['role'] == 'user' -%}
{{ 'USER: ' + message['content'] + '\n' }}
{%- elif message['role'] == 'assistant' -%}
{{ 'ASSISTANT: ' + message['content'] + eos_token + '\n' }}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{ 'ASSISTANT:' }}
{% endif %}
examples/tensorize_vllm_model.py
0 → 100644
View file @
3c631e30
import
argparse
import
dataclasses
import
json
import
os
import
uuid
from
functools
import
partial
from
tensorizer
import
stream_io
from
vllm
import
LLM
from
vllm.distributed
import
(
init_distributed_environment
,
initialize_model_parallel
)
from
vllm.engine.arg_utils
import
EngineArgs
from
vllm.engine.llm_engine
import
LLMEngine
from
vllm.model_executor.model_loader.tensorizer
import
(
TensorizerArgs
,
TensorizerConfig
,
serialize_vllm_model
)
# yapf conflicts with isort for this docstring
# yapf: disable
"""
tensorize_vllm_model.py is a script that can be used to serialize and
deserialize vLLM models. These models can be loaded using tensorizer
to the GPU extremely quickly over an HTTP/HTTPS endpoint, an S3 endpoint,
or locally. Tensor encryption and decryption is also supported, although
libsodium must be installed to use it. Install vllm with tensorizer support
using `pip install vllm[tensorizer]`. To learn more about tensorizer, visit
https://github.com/coreweave/tensorizer
To serialize a model, install vLLM from source, then run something
like this from the root level of this repository:
python -m examples.tensorize_vllm_model
\
--model facebook/opt-125m
\
serialize
\
--serialized-directory s3://my-bucket
\
--suffix v1
Which downloads the model from HuggingFace, loads it into vLLM, serializes it,
and saves it to your S3 bucket. A local directory can also be used. This
assumes your S3 credentials are specified as environment variables
in the form of `S3_ACCESS_KEY_ID`, `S3_SECRET_ACCESS_KEY`, and
`S3_ENDPOINT_URL`. To provide S3 credentials directly, you can provide
`--s3-access-key-id` and `--s3-secret-access-key`, as well as `--s3-endpoint`
as CLI args to this script.
You can also encrypt the model weights with a randomly-generated key by
providing a `--keyfile` argument.
To deserialize a model, you can run something like this from the root
level of this repository:
python -m examples.tensorize_vllm_model
\
--model EleutherAI/gpt-j-6B
\
--dtype float16
\
deserialize
\
--path-to-tensors s3://my-bucket/vllm/EleutherAI/gpt-j-6B/v1/model.tensors
Which downloads the model tensors from your S3 bucket and deserializes them.
You can also provide a `--keyfile` argument to decrypt the model weights if
they were serialized with encryption.
For more information on the available arguments for serializing, run
`python -m examples.tensorize_vllm_model serialize --help`.
Or for deserializing:
`python -m examples.tensorize_vllm_model deserialize --help`.
Once a model is serialized, tensorizer can be invoked with the `LLM` class
directly to load models:
llm = LLM(model="facebook/opt-125m",
load_format="tensorizer",
model_loader_extra_config=TensorizerConfig(
tensorizer_uri = path_to_tensors,
num_readers=3,
)
)
A serialized model can be used during model loading for the vLLM OpenAI
inference server. `model_loader_extra_config` is exposed as the CLI arg
`--model-loader-extra-config`, and accepts a JSON string literal of the
TensorizerConfig arguments desired.
In order to see all of the available arguments usable to configure
loading with tensorizer that are given to `TensorizerConfig`, run:
`python -m examples.tensorize_vllm_model deserialize --help`
under the `tensorizer options` section. These can also be used for
deserialization in this example script, although `--tensorizer-uri` and
`--path-to-tensors` are functionally the same in this case.
"""
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
"An example script that can be used to serialize and "
"deserialize vLLM models. These models "
"can be loaded using tensorizer directly to the GPU "
"extremely quickly. Tensor encryption and decryption is "
"also supported, although libsodium must be installed to "
"use it."
)
parser
=
EngineArgs
.
add_cli_args
(
parser
)
subparsers
=
parser
.
add_subparsers
(
dest
=
'command'
)
serialize_parser
=
subparsers
.
add_parser
(
'serialize'
,
help
=
"Serialize a model to `--serialized-directory`"
)
serialize_parser
.
add_argument
(
"--suffix"
,
type
=
str
,
required
=
False
,
help
=
(
"The suffix to append to the serialized model directory, which is "
"used to construct the location of the serialized model tensors, "
"e.g. if `--serialized-directory` is `s3://my-bucket/` and "
"`--suffix` is `v1`, the serialized model tensors will be "
"saved to "
"`s3://my-bucket/vllm/EleutherAI/gpt-j-6B/v1/model.tensors`. "
"If none is provided, a random UUID will be used."
))
serialize_parser
.
add_argument
(
"--serialized-directory"
,
type
=
str
,
required
=
True
,
help
=
"The directory to serialize the model to. "
"This can be a local directory or S3 URI. The path to where the "
"tensors are saved is a combination of the supplied `dir` and model "
"reference ID. For instance, if `dir` is the serialized directory, "
"and the model HuggingFace ID is `EleutherAI/gpt-j-6B`, tensors will "
"be saved to `dir/vllm/EleutherAI/gpt-j-6B/suffix/model.tensors`, "
"where `suffix` is given by `--suffix` or a random UUID if not "
"provided."
)
serialize_parser
.
add_argument
(
"--keyfile"
,
type
=
str
,
required
=
False
,
help
=
(
"Encrypt the model weights with a randomly-generated binary key,"
" and save the key at this path"
))
deserialize_parser
=
subparsers
.
add_parser
(
'deserialize'
,
help
=
(
"Deserialize a model from `--path-to-tensors`"
" to verify it can be loaded and used."
))
deserialize_parser
.
add_argument
(
"--path-to-tensors"
,
type
=
str
,
required
=
True
,
help
=
"The local path or S3 URI to the model tensors to deserialize. "
)
deserialize_parser
.
add_argument
(
"--keyfile"
,
type
=
str
,
required
=
False
,
help
=
(
"Path to a binary key to use to decrypt the model weights,"
" if the model was serialized with encryption"
))
TensorizerArgs
.
add_cli_args
(
deserialize_parser
)
return
parser
.
parse_args
()
def
deserialize
():
llm
=
LLM
(
model
=
args
.
model
,
load_format
=
"tensorizer"
,
model_loader_extra_config
=
tensorizer_config
)
return
llm
args
=
parse_args
()
s3_access_key_id
=
(
getattr
(
args
,
's3_access_key_id'
,
None
)
or
os
.
environ
.
get
(
"S3_ACCESS_KEY_ID"
,
None
))
s3_secret_access_key
=
(
getattr
(
args
,
's3_secret_access_key'
,
None
)
or
os
.
environ
.
get
(
"S3_SECRET_ACCESS_KEY"
,
None
))
s3_endpoint
=
(
getattr
(
args
,
's3_endpoint'
,
None
)
or
os
.
environ
.
get
(
"S3_ENDPOINT_URL"
,
None
))
credentials
=
{
"s3_access_key_id"
:
s3_access_key_id
,
"s3_secret_access_key"
:
s3_secret_access_key
,
"s3_endpoint"
:
s3_endpoint
}
_read_stream
,
_write_stream
=
(
partial
(
stream_io
.
open_stream
,
mode
=
mode
,
s3_access_key_id
=
s3_access_key_id
,
s3_secret_access_key
=
s3_secret_access_key
,
s3_endpoint
=
s3_endpoint
,
)
for
mode
in
(
"rb"
,
"wb+"
))
model_ref
=
args
.
model
model_name
=
model_ref
.
split
(
"/"
)[
1
]
os
.
environ
[
"MASTER_ADDR"
]
=
"127.0.0.1"
os
.
environ
[
"MASTER_PORT"
]
=
"8080"
init_distributed_environment
(
world_size
=
1
,
rank
=
0
,
local_rank
=
0
)
initialize_model_parallel
()
keyfile
=
args
.
keyfile
if
args
.
keyfile
else
None
if
args
.
model_loader_extra_config
:
config
=
json
.
loads
(
args
.
model_loader_extra_config
)
tensorizer_args
=
TensorizerConfig
(
**
config
).
_construct_tensorizer_args
()
tensorizer_args
.
tensorizer_uri
=
args
.
path_to_tensors
else
:
tensorizer_args
=
None
if
args
.
command
==
"serialize"
:
eng_args_dict
=
{
f
.
name
:
getattr
(
args
,
f
.
name
)
for
f
in
dataclasses
.
fields
(
EngineArgs
)}
engine_args
=
EngineArgs
.
from_cli_args
(
argparse
.
Namespace
(
**
eng_args_dict
))
engine
=
LLMEngine
.
from_engine_args
(
engine_args
)
input_dir
=
args
.
serialized_directory
.
rstrip
(
'/'
)
suffix
=
args
.
suffix
if
args
.
suffix
else
uuid
.
uuid4
().
hex
base_path
=
f
"
{
input_dir
}
/vllm/
{
model_ref
}
/
{
suffix
}
"
model_path
=
f
"
{
base_path
}
/model.tensors"
tensorizer_config
=
TensorizerConfig
(
tensorizer_uri
=
model_path
,
**
credentials
)
serialize_vllm_model
(
engine
,
tensorizer_config
,
keyfile
)
elif
args
.
command
==
"deserialize"
:
if
not
tensorizer_args
:
tensorizer_config
=
TensorizerConfig
(
tensorizer_uri
=
args
.
path_to_tensors
,
encryption_keyfile
=
keyfile
,
**
credentials
)
deserialize
()
else
:
raise
ValueError
(
"Either serialize or deserialize must be specified."
)
icon.png
0 → 100644
View file @
3c631e30
53.8 KB
model.properties
0 → 100644
View file @
3c631e30
# 模型唯一标识
modelCode
=
653
# 模型名称
modelName
=
qwen1.5_vllm
# 模型描述
modelDescription
=
Qwen1.5是阿里云开源大型语言模型系列,是Qwen2.0的beta版本。
# 应用场景
appScenario
=
推理,对话问答,科研,教育,政府,金融
# 框架类型
frameType
=
vllm
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment