Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
InternVL3_pytorch
Commits
26e59280
Commit
26e59280
authored
Apr 24, 2025
by
wanglch
Browse files
Initial commit
parents
Pipeline
#2674
failed with stages
in 0 seconds
Changes
323
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
506 additions
and
0 deletions
+506
-0
internvl_chat/examples/image5.jpg
internvl_chat/examples/image5.jpg
+0
-0
internvl_chat/internvl/conversation.py
internvl_chat/internvl/conversation.py
+402
-0
internvl_chat/internvl/dist_utils.py
internvl_chat/internvl/dist_utils.py
+104
-0
No files found.
Too many changes to show.
To preserve performance only
323 of 323+
files are displayed.
Plain diff
Email patch
internvl_chat/examples/image5.jpg
0 → 100644
View file @
26e59280
109 KB
internvl_chat/internvl/conversation.py
0 → 100644
View file @
26e59280
"""
Conversation prompt templates.
We kindly request that you import fastchat instead of copying this file if you wish to use it.
If you have changes in mind, please contribute back so the community can benefit collectively and continue to maintain these valuable templates.
"""
import
dataclasses
from
enum
import
IntEnum
,
auto
from
typing
import
Any
,
Dict
,
List
,
Tuple
,
Union
class
SeparatorStyle
(
IntEnum
):
"""Separator styles."""
ADD_COLON_SINGLE
=
auto
()
ADD_COLON_TWO
=
auto
()
ADD_COLON_SPACE_SINGLE
=
auto
()
NO_COLON_SINGLE
=
auto
()
NO_COLON_TWO
=
auto
()
ADD_NEW_LINE_SINGLE
=
auto
()
LLAMA2
=
auto
()
CHATGLM
=
auto
()
CHATML
=
auto
()
CHATINTERN
=
auto
()
DOLLY
=
auto
()
RWKV
=
auto
()
PHOENIX
=
auto
()
ROBIN
=
auto
()
FALCON_CHAT
=
auto
()
CHATGLM3
=
auto
()
INTERNVL_ZH
=
auto
()
MPT
=
auto
()
@
dataclasses
.
dataclass
class
Conversation
:
"""A class that manages prompt templates and keeps all conversation history."""
# The name of this template
name
:
str
# The template of the system prompt
system_template
:
str
=
'{system_message}'
# The system message
system_message
:
str
=
''
# The names of two roles
roles
:
Tuple
[
str
]
=
(
'USER'
,
'ASSISTANT'
)
# All messages. Each item is (role, message).
messages
:
List
[
List
[
str
]]
=
()
# The number of few shot examples
offset
:
int
=
0
# The separator style and configurations
sep_style
:
SeparatorStyle
=
SeparatorStyle
.
ADD_COLON_SINGLE
sep
:
str
=
'
\n
'
sep2
:
str
=
None
# Stop criteria (the default one is EOS token)
stop_str
:
Union
[
str
,
List
[
str
]]
=
None
# Stops generation if meeting any token in this list
stop_token_ids
:
List
[
int
]
=
None
def
get_prompt
(
self
)
->
str
:
"""Get the prompt for generation."""
system_prompt
=
self
.
system_template
.
format
(
system_message
=
self
.
system_message
)
if
self
.
sep_style
==
SeparatorStyle
.
ADD_COLON_SINGLE
:
ret
=
system_prompt
+
self
.
sep
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
': '
+
message
+
self
.
sep
else
:
ret
+=
role
+
':'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
ADD_COLON_TWO
:
seps
=
[
self
.
sep
,
self
.
sep2
]
ret
=
system_prompt
+
seps
[
0
]
for
i
,
(
role
,
message
)
in
enumerate
(
self
.
messages
):
if
message
:
ret
+=
role
+
': '
+
message
+
seps
[
i
%
2
]
else
:
ret
+=
role
+
':'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
ADD_COLON_SPACE_SINGLE
:
ret
=
system_prompt
+
self
.
sep
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
': '
+
message
+
self
.
sep
else
:
ret
+=
role
+
': '
# must be end with a space
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
ADD_NEW_LINE_SINGLE
:
ret
=
''
if
system_prompt
==
''
else
system_prompt
+
self
.
sep
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
'
\n
'
+
message
+
self
.
sep
else
:
ret
+=
role
+
'
\n
'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
NO_COLON_SINGLE
:
ret
=
system_prompt
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
message
+
self
.
sep
else
:
ret
+=
role
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
NO_COLON_TWO
:
seps
=
[
self
.
sep
,
self
.
sep2
]
ret
=
system_prompt
for
i
,
(
role
,
message
)
in
enumerate
(
self
.
messages
):
if
message
:
ret
+=
role
+
message
+
seps
[
i
%
2
]
else
:
ret
+=
role
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
RWKV
:
ret
=
system_prompt
for
i
,
(
role
,
message
)
in
enumerate
(
self
.
messages
):
if
message
:
ret
+=
(
role
+
': '
+
message
.
replace
(
'
\r\n
'
,
'
\n
'
).
replace
(
'
\n\n
'
,
'
\n
'
)
)
ret
+=
'
\n\n
'
else
:
ret
+=
role
+
':'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
LLAMA2
:
seps
=
[
self
.
sep
,
self
.
sep2
]
if
self
.
system_message
:
ret
=
system_prompt
else
:
ret
=
'[INST] '
for
i
,
(
role
,
message
)
in
enumerate
(
self
.
messages
):
tag
=
self
.
roles
[
i
%
2
]
if
message
:
if
i
==
0
:
ret
+=
message
+
' '
else
:
ret
+=
tag
+
' '
+
message
+
seps
[
i
%
2
]
else
:
ret
+=
tag
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
CHATGLM
:
# source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308
# source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926
round_add_n
=
1
if
self
.
name
==
'chatglm2'
else
0
if
system_prompt
:
ret
=
system_prompt
+
self
.
sep
else
:
ret
=
''
for
i
,
(
role
,
message
)
in
enumerate
(
self
.
messages
):
if
i
%
2
==
0
:
ret
+=
f
'[Round
{
i
//
2
+
round_add_n
}
]
{
self
.
sep
}
'
if
message
:
ret
+=
f
'
{
role
}
:
{
message
}{
self
.
sep
}
'
else
:
ret
+=
f
'
{
role
}
:'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
CHATML
:
ret
=
''
if
system_prompt
==
''
else
system_prompt
+
self
.
sep
+
'
\n
'
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
'
\n
'
+
message
+
self
.
sep
+
'
\n
'
else
:
ret
+=
role
+
'
\n
'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
CHATGLM3
:
ret
=
''
if
self
.
system_message
:
ret
+=
system_prompt
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
'
\n
'
+
' '
+
message
else
:
ret
+=
role
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
CHATINTERN
:
# source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771
seps
=
[
self
.
sep
,
self
.
sep2
]
ret
=
system_prompt
for
i
,
(
role
,
message
)
in
enumerate
(
self
.
messages
):
# if i % 2 == 0:
# ret += "<s>"
if
message
:
ret
+=
role
+
':'
+
message
+
seps
[
i
%
2
]
+
'
\n
'
else
:
ret
+=
role
+
':'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
DOLLY
:
seps
=
[
self
.
sep
,
self
.
sep2
]
ret
=
system_prompt
for
i
,
(
role
,
message
)
in
enumerate
(
self
.
messages
):
if
message
:
ret
+=
role
+
':
\n
'
+
message
+
seps
[
i
%
2
]
if
i
%
2
==
1
:
ret
+=
'
\n\n
'
else
:
ret
+=
role
+
':
\n
'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
PHOENIX
:
ret
=
system_prompt
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
': '
+
'<s>'
+
message
+
'</s>'
else
:
ret
+=
role
+
': '
+
'<s>'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
ROBIN
:
ret
=
system_prompt
+
self
.
sep
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
':
\n
'
+
message
+
self
.
sep
else
:
ret
+=
role
+
':
\n
'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
FALCON_CHAT
:
ret
=
''
if
self
.
system_message
:
ret
+=
system_prompt
+
self
.
sep
for
role
,
message
in
self
.
messages
:
if
message
:
ret
+=
role
+
': '
+
message
+
self
.
sep
else
:
ret
+=
role
+
':'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
INTERNVL_ZH
:
seps
=
[
self
.
sep2
,
self
.
sep
]
ret
=
self
.
system_message
+
seps
[
0
]
for
i
,
(
role
,
message
)
in
enumerate
(
self
.
messages
):
if
message
:
ret
+=
role
+
': '
+
message
+
seps
[
i
%
2
]
else
:
ret
+=
role
+
':'
return
ret
elif
self
.
sep_style
==
SeparatorStyle
.
MPT
:
ret
=
system_prompt
+
self
.
sep
for
role
,
message
in
self
.
messages
:
if
message
:
if
type
(
message
)
is
tuple
:
message
,
_
,
_
=
message
ret
+=
role
+
message
+
self
.
sep
else
:
ret
+=
role
return
ret
else
:
raise
ValueError
(
f
'Invalid style:
{
self
.
sep_style
}
'
)
def
set_system_message
(
self
,
system_message
:
str
):
"""Set the system message."""
self
.
system_message
=
system_message
def
append_message
(
self
,
role
:
str
,
message
:
str
):
"""Append a new message."""
self
.
messages
.
append
([
role
,
message
])
def
update_last_message
(
self
,
message
:
str
):
"""Update the last output.
The last message is typically set to be None when constructing the prompt,
so we need to update it in-place after getting the response from a model.
"""
self
.
messages
[
-
1
][
1
]
=
message
def
to_gradio_chatbot
(
self
):
"""Convert the conversation to gradio chatbot format."""
ret
=
[]
for
i
,
(
role
,
msg
)
in
enumerate
(
self
.
messages
[
self
.
offset
:]):
if
i
%
2
==
0
:
ret
.
append
([
msg
,
None
])
else
:
ret
[
-
1
][
-
1
]
=
msg
return
ret
def
to_openai_api_messages
(
self
):
"""Convert the conversation to OpenAI chat completion format."""
ret
=
[{
'role'
:
'system'
,
'content'
:
self
.
system_message
}]
for
i
,
(
_
,
msg
)
in
enumerate
(
self
.
messages
[
self
.
offset
:]):
if
i
%
2
==
0
:
ret
.
append
({
'role'
:
'user'
,
'content'
:
msg
})
else
:
if
msg
is
not
None
:
ret
.
append
({
'role'
:
'assistant'
,
'content'
:
msg
})
return
ret
def
copy
(
self
):
return
Conversation
(
name
=
self
.
name
,
system_template
=
self
.
system_template
,
system_message
=
self
.
system_message
,
roles
=
self
.
roles
,
messages
=
[[
x
,
y
]
for
x
,
y
in
self
.
messages
],
offset
=
self
.
offset
,
sep_style
=
self
.
sep_style
,
sep
=
self
.
sep
,
sep2
=
self
.
sep2
,
stop_str
=
self
.
stop_str
,
stop_token_ids
=
self
.
stop_token_ids
,
)
def
dict
(
self
):
return
{
'template_name'
:
self
.
name
,
'system_message'
:
self
.
system_message
,
'roles'
:
self
.
roles
,
'messages'
:
self
.
messages
,
'offset'
:
self
.
offset
,
}
# A global registry for all conversation templates
conv_templates
:
Dict
[
str
,
Conversation
]
=
{}
def
register_conv_template
(
template
:
Conversation
,
override
:
bool
=
False
):
"""Register a new conversation template."""
if
not
override
:
assert
(
template
.
name
not
in
conv_templates
),
f
'
{
template
.
name
}
has been registered.'
conv_templates
[
template
.
name
]
=
template
def
get_conv_template
(
name
:
str
)
->
Conversation
:
"""Get a conversation template."""
return
conv_templates
[
name
].
copy
()
# InternVL-Chat-V1-1 template
register_conv_template
(
Conversation
(
name
=
'internvl_zh'
,
system_template
=
''
,
roles
=
(
'<human>'
,
'<bot>'
),
sep_style
=
SeparatorStyle
.
INTERNVL_ZH
,
sep
=
'</s>'
,
sep2
=
' '
,
)
)
# Both Hermes-2 and internlm2-chat are chatml-format conversation templates. The difference
# is that during training, the preprocessing function for the Hermes-2 template doesn't add
# <s> at the beginning of the tokenized sequence, while the internlm2-chat template does.
# Therefore, they are completely equivalent during inference.
register_conv_template
(
Conversation
(
name
=
'Hermes-2'
,
system_template
=
'<|im_start|>system
\n
{system_message}'
,
# note: The new system prompt was not used here to avoid changes in benchmark performance.
# system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
system_message
=
'你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。'
,
roles
=
(
'<|im_start|>user
\n
'
,
'<|im_start|>assistant
\n
'
),
sep_style
=
SeparatorStyle
.
MPT
,
sep
=
'<|im_end|>'
,
stop_str
=
'<|endoftext|>'
,
)
)
register_conv_template
(
Conversation
(
name
=
'internlm2-chat'
,
system_template
=
'<|im_start|>system
\n
{system_message}'
,
# note: The new system prompt was not used here to avoid changes in benchmark performance.
# system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
system_message
=
'你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。'
,
roles
=
(
'<|im_start|>user
\n
'
,
'<|im_start|>assistant
\n
'
),
sep_style
=
SeparatorStyle
.
MPT
,
sep
=
'<|im_end|>'
,
)
)
register_conv_template
(
Conversation
(
name
=
'phi3-chat'
,
system_template
=
'<|system|>
\n
{system_message}'
,
# note: The new system prompt was not used here to avoid changes in benchmark performance.
# system_message='我是书生·万象,英文名是InternVL,是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',
system_message
=
'你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型,英文名叫InternVL, 是一个有用无害的人工智能助手。'
,
roles
=
(
'<|user|>
\n
'
,
'<|assistant|>
\n
'
),
sep_style
=
SeparatorStyle
.
MPT
,
sep
=
'<|end|>'
,
)
)
register_conv_template
(
Conversation
(
name
=
'internvl2_5'
,
system_template
=
'<|im_start|>system
\n
{system_message}'
,
system_message
=
'你是书生·万象,英文名是InternVL,是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。'
,
roles
=
(
'<|im_start|>user
\n
'
,
'<|im_start|>assistant
\n
'
),
sep_style
=
SeparatorStyle
.
MPT
,
sep
=
'<|im_end|>
\n
'
,
)
)
internvl_chat/internvl/dist_utils.py
0 → 100644
View file @
26e59280
import
os
import
socket
import
subprocess
from
datetime
import
timedelta
import
deepspeed
import
torch
import
torch.multiprocessing
as
mp
from
torch
import
distributed
as
dist
timeout
=
timedelta
(
minutes
=
60
)
def
_find_free_port
():
# Copied from https://github.com/facebookresearch/detectron2/blob/main/detectron2/engine/launch.py # noqa: E501
sock
=
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
# Binding to port 0 will cause the OS to find an available port for us
sock
.
bind
((
''
,
0
))
port
=
sock
.
getsockname
()[
1
]
sock
.
close
()
# NOTE: there is still a chance the port could be taken by other processes.
return
port
def
_is_free_port
(
port
):
ips
=
socket
.
gethostbyname_ex
(
socket
.
gethostname
())[
-
1
]
ips
.
append
(
'localhost'
)
with
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
)
as
s
:
return
all
(
s
.
connect_ex
((
ip
,
port
))
!=
0
for
ip
in
ips
)
def
init_dist
(
launcher
,
backend
=
'nccl'
,
**
kwargs
):
if
mp
.
get_start_method
(
allow_none
=
True
)
is
None
:
mp
.
set_start_method
(
'spawn'
)
if
launcher
==
'pytorch'
:
_init_dist_pytorch
(
backend
,
**
kwargs
)
elif
launcher
==
'mpi'
:
_init_dist_mpi
(
backend
,
**
kwargs
)
elif
launcher
==
'slurm'
:
_init_dist_slurm
(
backend
,
**
kwargs
)
else
:
raise
ValueError
(
f
'Invalid launcher type:
{
launcher
}
'
)
def
_init_dist_pytorch
(
backend
,
**
kwargs
):
# TODO: use local_rank instead of rank % num_gpus
rank
=
int
(
os
.
environ
[
'RANK'
])
num_gpus
=
torch
.
cuda
.
device_count
()
torch
.
cuda
.
set_device
(
rank
%
num_gpus
)
# dist.init_process_group(backend=backend, **kwargs)
deepspeed
.
init_distributed
(
dist_backend
=
backend
)
def
_init_dist_mpi
(
backend
,
**
kwargs
):
local_rank
=
int
(
os
.
environ
[
'OMPI_COMM_WORLD_LOCAL_RANK'
])
torch
.
cuda
.
set_device
(
local_rank
)
if
'MASTER_PORT'
not
in
os
.
environ
:
# 29500 is torch.distributed default port
os
.
environ
[
'MASTER_PORT'
]
=
'29500'
if
'MASTER_ADDR'
not
in
os
.
environ
:
raise
KeyError
(
'The environment variable MASTER_ADDR is not set'
)
os
.
environ
[
'WORLD_SIZE'
]
=
os
.
environ
[
'OMPI_COMM_WORLD_SIZE'
]
os
.
environ
[
'RANK'
]
=
os
.
environ
[
'OMPI_COMM_WORLD_RANK'
]
dist
.
init_process_group
(
backend
=
backend
,
**
kwargs
)
def
_init_dist_slurm
(
backend
,
port
=
None
):
"""Initialize slurm distributed training environment.
If argument ``port`` is not specified, then the master port will be system
environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
environment variable, then a default port ``29500`` will be used.
Args:
backend (str): Backend of torch.distributed.
port (int, optional): Master port. Defaults to None.
"""
proc_id
=
int
(
os
.
environ
[
'SLURM_PROCID'
])
ntasks
=
int
(
os
.
environ
[
'SLURM_NTASKS'
])
node_list
=
os
.
environ
[
'SLURM_NODELIST'
]
num_gpus
=
torch
.
cuda
.
device_count
()
torch
.
cuda
.
set_device
(
proc_id
%
num_gpus
)
addr
=
subprocess
.
getoutput
(
f
'scontrol show hostname
{
node_list
}
| head -n1'
)
# specify master port
if
port
is
not
None
:
os
.
environ
[
'MASTER_PORT'
]
=
str
(
port
)
elif
'MASTER_PORT'
in
os
.
environ
:
pass
# use MASTER_PORT in the environment variable
else
:
# if torch.distributed default port(29500) is available
# then use it, else find a free port
if
_is_free_port
(
29500
):
os
.
environ
[
'MASTER_PORT'
]
=
'29500'
else
:
os
.
environ
[
'MASTER_PORT'
]
=
str
(
_find_free_port
())
# use MASTER_ADDR in the environment variable if it already exists
if
'MASTER_ADDR'
not
in
os
.
environ
:
os
.
environ
[
'MASTER_ADDR'
]
=
addr
os
.
environ
[
'WORLD_SIZE'
]
=
str
(
ntasks
)
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
proc_id
%
num_gpus
)
os
.
environ
[
'RANK'
]
=
str
(
proc_id
)
# dist.init_process_group(backend=backend, timeout=timeout)
deepspeed
.
init_distributed
(
dist_backend
=
backend
)
Prev
1
…
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment