"include/ck/config.hpp" did not exist on "b3e8d57d51300b88b591900621f71b6a1b3a7acc"
Commit 0bc22e1d authored by wanglch's avatar wanglch
Browse files

Initial commit

parents
# Adopted from https://github.com/lm-sys/FastChat. Below is the original copyright:
# Adopted from tatsu-lab@stanford_alpaca. Below is the original copyright:
# Make it more memory efficient by monkey patching the LLaMA model with FlashAttn.
# Need to call this before importing transformers.
from vary.utils.llama_flash_attn_monkey_patch import replace_llama_attn_with_flash_attn
replace_llama_attn_with_flash_attn()
# from vary.train.train import train
from vary.train.train_lora import train
if __name__ == "__main__":
train()
# Adopted from https://github.com/lm-sys/FastChat. Below is the original copyright:
# Adopted from tatsu-lab@stanford_alpaca. Below is the original copyright:
# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import pathlib
import torch
import transformers
from vary.train.trainer_vit_fixlr import varyTrainer
from vary.model import *
from vary.data import make_supervised_data_module
from vary.utils.arguments import *
from vary.utils.constants import *
from vary.model.vision_encoder.sam import build_sam_vit_b
def train():
parser = transformers.HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
tokenizer = transformers.AutoTokenizer.from_pretrained(model_args.model_name_or_path, use_fast=False, padding_side="right", model_max_length=training_args.model_max_length)
model = varyOPTForCausalLM.from_pretrained(model_args.model_name_or_path)
dtype = torch.float32
if training_args.fp16:
dtype = torch.float16
if training_args.bf16:
dtype = torch.bfloat16
vision_tower_dict = model.get_model().initialize_vision_modules(
vision_tower=model_args.vision_tower,
pretrained_stage1_model=model_args.pretrained_stage1_model,
freeze_vision_tower=model_args.freeze_vision_tower,
use_im_start_end=model_args.use_im_start_end,
vision_select_layer=model_args.vision_select_layer,
dtype=dtype,
device=training_args.device
)
model.initialize_vision_tokenizer(
tokenizer=tokenizer,
freeze_lm_model=model_args.freeze_lm_model,
pretrained_stage1_model=model_args.pretrained_stage1_model,
device=training_args.device,
)
model.to(dtype=dtype, device=training_args.device)
data_args.image_token_len = 256
data_args.image_processor = vision_tower_dict['image_processor']
data_args.image_processor_high = vision_tower_dict['image_processor_high']
data_args.use_im_start_end = model_args.use_im_start_end
# mixed relation, to be fixed
if model_args.freeze_lm_model:
model.requires_grad_(False)
for p in model.get_model().mm_projector.parameters():
p.requires_grad = True
for p in model.get_input_embeddings().parameters():
p.requires_grad = True
if not model_args.freeze_vision_tower:
model.get_model().vision_tower.requires_grad_(True)
params_grad = [p.numel() for n, p in model.named_parameters() if p.requires_grad]
print(f"Number of Mapping Trainable Parameters: {sum(params_grad) / (1 << 20):.2f} M")
# params_no_grad = [n for n, p in model.named_parameters() if not p.requires_grad]
# if len(params_no_grad) > 0:
# if training_args.fsdp is not None and len(training_args.fsdp) > 0:
# if len(params_no_grad) < 10:
# print('[WARNING] Attempting to use FSDP while {} parameters do not require gradients: {}'. format(len(params_no_grad), params_no_grad))
# else:
# print('[WARNING] Attempting to use FSDP while {} parameters do not require gradients: {}...(omitted)'. format(len(params_no_grad), ', '.join(params_no_grad[:10])))
# print("[WARNING] Attempting to use FSDP with partially frozen paramters, this is experimental.")
# print("[WARNING] As of 4/30/23, this feature requires PyTorch-nightly build. See here for details: https://github.com/haotian-liu/LLaVA#experimental-use-fsdp-to-save-memory-in-pretraining")
# from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP
# def patch_FSDP_use_orig_params(func):
# def wrap_func(*args, **kwargs):
# use_orig_params = kwargs.pop('use_orig_params', True)
# return func(*args, **kwargs, use_orig_params=use_orig_params)
# return wrap_func
# FSDP.__init__ = patch_FSDP_use_orig_params(FSDP.__init__)
# interleave = True
data_module = make_supervised_data_module(
interleave=training_args.interleave,
with_box=training_args.with_box,
tokenizer=tokenizer,
data_args=data_args
)
trainer = varyTrainer(
model=model,
tokenizer=tokenizer,
args=training_args,
**data_module)
if list(pathlib.Path(training_args.output_dir).glob("checkpoint-*")):
trainer.train(resume_from_checkpoint=True)
else:
trainer.train()
trainer.save_state()
trainer._safe_save(output_dir=training_args.output_dir)
if __name__ == "__main__":
train()
# Adopted from https://github.com/lm-sys/FastChat. Below is the original copyright:
# Adopted from tatsu-lab@stanford_alpaca. Below is the original copyright:
# Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import pathlib
import torch
import transformers
from vary.train.trainer_vit_fixlr import varyTrainer
from vary.model import *
from vary.data import make_supervised_data_module
from vary.utils.arguments import *
from vary.utils.utils import smart_tokenizer_and_embedding_resize
from vary.model.vision_encoder.sam import build_sam_vit_b
def train():
parser = transformers.HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
tokenizer = transformers.AutoTokenizer.from_pretrained("model_args.model_name_or_path", trust_remote_code=True, padding_side="right", model_max_length=training_args.model_max_length,)
model = varyQwenForCausalLM.from_pretrained(model_args.model_name_or_path, low_cpu_mem_usage=True, device_map='cuda')
smart_tokenizer_and_embedding_resize(
special_tokens_dict=dict(pad_token='<|endoftext|>'),
tokenizer=tokenizer,
model=model,
)
dtype = torch.float32
if training_args.fp16:
dtype = torch.float16
if training_args.bf16:
dtype = torch.bfloat16
vision_tower_dict = model.get_model().initialize_vision_modules(
vision_tower=model_args.vision_tower,
pretrained_stage1_model=model_args.pretrained_stage1_model,
freeze_vision_tower=model_args.freeze_vision_tower,
use_im_start_end=model_args.use_im_start_end,
vision_select_layer=model_args.vision_select_layer,
dtype=dtype,
device=training_args.device
)
model.initialize_vision_tokenizer(
tokenizer=tokenizer,
freeze_lm_model=model_args.freeze_lm_model,
pretrained_stage1_model=model_args.pretrained_stage1_model,
device=training_args.device,
)
model.to(dtype=dtype, device=training_args.device)
data_args.image_token_len = 256
data_args.image_processor = vision_tower_dict['image_processor']
data_args.image_processor_high = vision_tower_dict['image_processor_high']
data_args.use_im_start_end = model_args.use_im_start_end
# mixed relation, to be fixed
if model_args.freeze_lm_model:
model.requires_grad_(False)
for p in model.get_model().mm_projector.parameters():
p.requires_grad = True
for p in model.get_model().mm_projector_vary.parameters():
p.requires_grad = True
for p in model.get_input_embeddings().parameters():
p.requires_grad = True
if not model_args.freeze_vision_tower:
model.get_model().vision_tower.requires_grad_(True)
model.get_model().vision_tower_high.requires_grad_(True)
params_grad = [p.numel() for n, p in model.named_parameters() if p.requires_grad]
print(f"Number of Mapping Trainable Parameters: {sum(params_grad) / (1 << 20):.2f} M")
# params_no_grad = [n for n, p in model.named_parameters() if not p.requires_grad]
# if len(params_no_grad) > 0:
# if training_args.fsdp is not None and len(training_args.fsdp) > 0:
# if len(params_no_grad) < 10:
# print('[WARNING] Attempting to use FSDP while {} parameters do not require gradients: {}'. format(len(params_no_grad), params_no_grad))
# else:
# print('[WARNING] Attempting to use FSDP while {} parameters do not require gradients: {}...(omitted)'. format(len(params_no_grad), ', '.join(params_no_grad[:10])))
# print("[WARNING] Attempting to use FSDP with partially frozen paramters, this is experimental.")
# print("[WARNING] As of 4/30/23, this feature requires PyTorch-nightly build. See here for details: https://github.com/haotian-liu/LLaVA#experimental-use-fsdp-to-save-memory-in-pretraining")
# from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel as FSDP
# def patch_FSDP_use_orig_params(func):
# def wrap_func(*args, **kwargs):
# use_orig_params = kwargs.pop('use_orig_params', True)
# return func(*args, **kwargs, use_orig_params=use_orig_params)
# return wrap_func
# FSDP.__init__ = patch_FSDP_use_orig_params(FSDP.__init__)
data_module = make_supervised_data_module(
interleave=training_args.interleave,
with_box=training_args.with_box,
tokenizer=tokenizer,
data_args=data_args
)
trainer = varyTrainer(
model=model,
tokenizer=tokenizer,
args=training_args,
**data_module)
if list(pathlib.Path(training_args.output_dir).glob("checkpoint-*")):
trainer.train(resume_from_checkpoint=True)
else:
trainer.train()
trainer.save_state()
trainer._safe_save(output_dir=training_args.output_dir)
if __name__ == "__main__":
train()
import os
import torch
import torch.nn as nn
from transformers import Trainer
from typing import Dict, Optional, Sequence
def unwrap_model(model: nn.Module) -> nn.Module:
"""
Recursively unwraps a model from potential containers (as used in distributed training).
Args:
model (`torch.nn.Module`): The model to unwrap.
"""
# since there could be multiple levels of wrapping, unwrap recursively
if hasattr(model, "module"):
return unwrap_model(model.module)
else:
return model
class varyTrainer(Trainer):
def _safe_save(self, output_dir: str):
"""Collects the state dict and dump to disk."""
if self.deepspeed:
torch.cuda.synchronize()
self.save_model(output_dir)
return
state_dict = self.model.state_dict()
if self.args.should_save:
cpu_state_dict = {
key: value.cpu()
for key, value in state_dict.items()
}
del state_dict
self._save(output_dir, state_dict=cpu_state_dict) # noqa
def _save(self, output_dir: Optional[str] = None, state_dict=None):
if getattr(self.args, 'tune_mm_mlp_adapter', False):
# Save the model
_state_dict = state_dict
if _state_dict is None:
# Only save the model itself if we are using distributed training
model_to_save = unwrap_model(self.model)
_state_dict = model_to_save.state_dict()
weight_to_save = {}
keys_to_match = ['mm_projector', 'embed_tokens', 'embed_in']
for k, v in _state_dict.items():
if any(key_match in k for key_match in keys_to_match):
weight_to_save[k] = v
current_folder = output_dir.split('/')[-1]
parent_folder = os.path.dirname(output_dir)
if current_folder.startswith('checkpoint-'):
mm_projector_folder = os.path.join(parent_folder, "mm_projector")
os.makedirs(mm_projector_folder, exist_ok=True)
torch.save(weight_to_save, os.path.join(mm_projector_folder, f'{current_folder}.bin'))
else:
torch.save(weight_to_save, os.path.join(output_dir, f'mm_projector.bin'))
super(varyTrainer, self)._save(output_dir, state_dict)
import os
import torch
import torch.nn as nn
from transformers import Trainer
from transformers.trainer_pt_utils import get_parameter_names
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
from typing import Dict, Optional, Sequence
def unwrap_model(model: nn.Module) -> nn.Module:
"""
Recursively unwraps a model from potential containers (as used in distributed training).
Args:
model (`torch.nn.Module`): The model to unwrap.
"""
# since there could be multiple levels of wrapping, unwrap recursively
if hasattr(model, "module"):
return unwrap_model(model.module)
else:
return model
class varyTrainer(Trainer):
def _safe_save(self, output_dir: str):
"""Collects the state dict and dump to disk."""
state_dict = self.model.state_dict()
if self.args.should_save:
cpu_state_dict = {
key: value.cpu()
for key, value in state_dict.items()
}
del state_dict
self._save(output_dir, state_dict=cpu_state_dict) # noqa
def _save(self, output_dir: Optional[str] = None, state_dict=None):
if getattr(self.args, 'tune_mm_mlp_adapter', False):
# Save the model
_state_dict = state_dict
if _state_dict is None:
# Only save the model itself if we are using distributed training
model_to_save = unwrap_model(self.model)
_state_dict = model_to_save.state_dict()
weight_to_save = {}
keys_to_match = ['mm_projector', 'embed_tokens', 'embed_in']
for k, v in _state_dict.items():
if any(key_match in k for key_match in keys_to_match):
weight_to_save[k] = v
current_folder = output_dir.split('/')[-1]
parent_folder = os.path.dirname(output_dir)
if current_folder.startswith('checkpoint-'):
mm_projector_folder = os.path.join(parent_folder, "mm_projector")
os.makedirs(mm_projector_folder, exist_ok=True)
torch.save(weight_to_save, os.path.join(mm_projector_folder, f'{current_folder}.bin'))
else:
torch.save(weight_to_save, os.path.join(output_dir, f'mm_projector.bin'))
super(varyTrainer, self)._save(output_dir, state_dict)
def create_optimizer(self):
"""
Setup the optimizer.
We provide a reasonable default that works well. If you want to use something else, you can pass a tuple in the
Trainer's init through `optimizers`, or subclass and override this method in a subclass.
"""
opt_model = self.model
if self.optimizer is None:
decay_parameters = get_parameter_names(opt_model, ALL_LAYERNORM_LAYERS)
decay_parameters = [name for name in decay_parameters if "bias" not in name]
optimizer_grouped_parameters = [
{
"params": [
p for n, p in opt_model.named_parameters() if 'vision_encoder' in n and n in decay_parameters and p.requires_grad
],
"weight_decay": self.args.weight_decay,
"lr": self.args.learning_rate,
},
{
"params": [
p for n, p in opt_model.named_parameters() if 'vision_encoder' in n and n not in decay_parameters and p.requires_grad],
"weight_decay": 0.0,
"lr": self.args.learning_rate,
},
{
"params": [
p for n, p in opt_model.named_parameters() if 'vision_encoder' not in n and n in decay_parameters and p.requires_grad],
"weight_decay": self.args.weight_decay,
"lr": self.args.learning_rate,
},
{
"params": [
p for n, p in opt_model.named_parameters() if 'vision_encoder' not in n and n not in decay_parameters and p.requires_grad
],
"weight_decay": 0.0,
"lr": self.args.learning_rate,
},
]
for idx, group in enumerate(optimizer_grouped_parameters):
print(idx, len(group['params']), group['lr'])
optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs(self.args)
self.optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs)
return self.optimizer
\ No newline at end of file
from dataclasses import dataclass, field
from typing import Dict, Optional, Sequence
import transformers
@dataclass
class ModelArguments:
model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
use_cache: bool = field(default=False)
vision_tower: Optional[str] = field(default="~/.cache/huggingface/hub/models--openai--clip-vit-large-patch14/snapshots/8d052a0f05efbaefbc9e8786ba291cfdf93e5bff/")
freeze_vision_tower: bool = field(default=False)
freeze_lm_model: bool = field(default=False)
pretrained_stage1_model: Optional[str] = field(default=None) # mlp &/ vision tower
vision_select_layer: Optional[int] = field(default=-1) # default to the last layer
use_im_start_end: bool = field(default=False)
@dataclass
class DataArguments:
datasets: str = field(default=None, metadata={"help": "combinations of the training data."})
sep_image_conv_front: bool = False
image_token_len: int = 256
image_aspect_ratio: str = 'square'
conversation_version: str = 'mpt'
# conversation_version: str = 'v0'
# conversation_version: str = 'v1'
# conversation_version: str = 'opt'
box_limit: int = 0
@dataclass
class TrainingArguments(transformers.TrainingArguments):
cache_dir: Optional[str] = field(default=None)
optim: str = field(default="adamw_torch")
remove_unused_columns: bool = field(default=False)
force_fsdp: bool = field(default=False)
interleave: bool = field(default=False)
with_box: bool = field(default=False)
model_max_length: int = field(
default=512,
metadata={
"help":
"Maximum sequence length. Sequences will be right padded (and possibly truncated)."
},
)
lora_enable: bool = False
lora_r: int = 8
lora_alpha: int = 16
lora_dropout: float = 0.05
lora_weight_path: str = ""
lora_bias: str = "none"
\ No newline at end of file
CONTROLLER_HEART_BEAT_EXPIRATION = 30
WORKER_HEART_BEAT_INTERVAL = 15
LOGDIR = "log"
IGNORE_INDEX = -100
# DEFAULT_PAD_TOKEN = "[PAD]"
DEFAULT_PAD_TOKEN = "<|endoftext|>"
DEFAULT_EOS_TOKEN = "</s>"
DEFAULT_BOS_TOKEN = "</s>"
DEFAULT_UNK_TOKEN = "<unk>"
DEFAULT_IMAGE_TOKEN = "<image>"
DEFAULT_BOX_TOKEN = "<box>"
DEFAULT_IMAGE_PATCH_TOKEN = '<imgpad>'
DEFAULT_IM_START_TOKEN = '<img>'
DEFAULT_IM_END_TOKEN = '</img>'
ROOT_PATH = '/data/public/ucaswei/data/'
CONVERSATION_DATA = {
# pair 4m
'laion-coco-4m': {
'images': '',
'annotations': '',
},
'cc665k': {
'images': "/path_to/LLaVA1.5/images/",
'annotations': "/path_to/LLaVA1.5/llava_v1_5_66k.json",
},
'pdf': {
'images': "",
'annotations': "",
},
'docvqa_train': {
'images': "",
'annotations': "",
},
'chartqa_train': {
'images': "",
'annotations': "",
},
}
\ No newline at end of file
import dataclasses
from enum import auto, Enum
from typing import List, Tuple
class SeparatorStyle(Enum):
"""Different separator style."""
SINGLE = auto()
TWO = auto()
MPT = auto()
@dataclasses.dataclass
class Conversation:
"""A class that keeps all conversation history."""
system: str
roles: List[str]
messages: List[List[str]]
offset: int
sep_style: SeparatorStyle = SeparatorStyle.SINGLE
sep: str = "<|im_end|>"
sep2: str = None
version: str = "Unknown"
skip_next: bool = False
def get_prompt(self):
if self.sep_style == SeparatorStyle.SINGLE:
ret = self.system + self.sep + '\n'
for role, message in self.messages:
if message:
if type(message) is tuple:
message, _, _ = message
ret += role + ": " + message + self.sep
else:
ret += role + ":"
return ret
elif self.sep_style == SeparatorStyle.TWO:
seps = [self.sep, self.sep2]
ret = self.system + seps[0]
for i, (role, message) in enumerate(self.messages):
if message:
if type(message) is tuple:
message, _, _ = message
ret += role + ": " + message + seps[i % 2]
else:
ret += role + ":"
return ret
if self.sep_style == SeparatorStyle.MPT:
if self.system:
ret = self.system + self.sep
else:
ret = ''
for role, message in self.messages:
if message:
if type(message) is tuple:
message, _, _ = message
ret += role + message + self.sep
else:
ret += role
return ret
else:
raise ValueError(f"Invalid style: {self.sep_style}")
def append_message(self, role, message):
self.messages.append([role, message])
def get_images(self, return_pil=False):
images = []
for i, (role, msg) in enumerate(self.messages[self.offset:]):
if i % 2 == 0:
if type(msg) is tuple:
import base64
from io import BytesIO
from PIL import Image
msg, image, image_process_mode = msg
if image_process_mode == "Pad":
def expand2square(pil_img, background_color=(122, 116, 104)):
width, height = pil_img.size
if width == height:
return pil_img
elif width > height:
result = Image.new(pil_img.mode, (width, width), background_color)
# result.paste(pil_img, (0, (width - height) // 2))
result.paste(pil_img)
return result
else:
result = Image.new(pil_img.mode, (height, height), background_color)
# result.paste(pil_img, ((height - width) // 2, 0))
result.paste(pil_img)
return result
image = expand2square(image)
elif image_process_mode == "Crop":
max_hw, min_hw = max(image.size), min(image.size)
aspect_ratio = max_hw / min_hw
max_len, min_len = 800, 400
shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
longest_edge = int(shortest_edge * aspect_ratio)
W, H = image.size
if H > W:
H, W = longest_edge, shortest_edge
else:
H, W = shortest_edge, longest_edge
image = image.resize((W, H))
elif image_process_mode == "Resize":
image = image.resize((224, 224))
else:
raise ValueError(f"Invalid image_process_mode: {image_process_mode}")
if return_pil:
images.append(image)
else:
buffered = BytesIO()
image.convert('RGB').save(buffered, format="JPEG")
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
images.append(img_b64_str)
return images
def to_gradio_chatbot(self):
ret = []
for i, (role, msg) in enumerate(self.messages[self.offset:]):
if i % 2 == 0:
if type(msg) is tuple:
import base64
from io import BytesIO
msg, image, image_process_mode = msg
max_hw, min_hw = max(image.size), min(image.size)
aspect_ratio = max_hw / min_hw
max_len, min_len = 800, 400
shortest_edge = int(min(max_len / aspect_ratio, min_len, min_hw))
longest_edge = int(shortest_edge * aspect_ratio)
W, H = image.size
if H > W:
H, W = longest_edge, shortest_edge
else:
H, W = shortest_edge, longest_edge
image = image.resize((W, H))
# image = image.resize((224, 224))
buffered = BytesIO()
image.save(buffered, format="JPEG")
img_b64_str = base64.b64encode(buffered.getvalue()).decode()
img_str = f'<img src="data:image/png;base64,{img_b64_str}" alt="user upload image" />'
msg = msg.replace('<image>', img_str)
ret.append([msg, None])
else:
ret[-1][-1] = msg
return ret
def copy(self):
return Conversation(
system=self.system,
roles=self.roles,
messages=[[x, y] for x, y in self.messages],
offset=self.offset,
sep_style=self.sep_style,
sep=self.sep,
sep2=self.sep2)
def dict(self):
if len(self.get_images()) > 0:
return {
"system": self.system,
"roles": self.roles,
"messages": [[x, y[0] if type(y) is tuple else y] for x, y in self.messages],
"offset": self.offset,
"sep": self.sep,
"sep2": self.sep2,
}
return {
"system": self.system,
"roles": self.roles,
"messages": self.messages,
"offset": self.offset,
"sep": self.sep,
"sep2": self.sep2,
}
conv_v1 = Conversation(
system="A chat between a curious human and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the human's questions.",
roles=("Human", "Assistant"),
messages=(
("Human", "Give three tips for staying healthy."),
("Assistant",
"Sure, here are three tips for staying healthy:\n"
"1. Exercise regularly: Regular physical activity can help improve your overall health and wellbeing. "
"It can also help reduce your risk of chronic conditions such as obesity, diabetes, heart disease, "
"and certain cancers. Aim for at least 150 minutes of moderate-intensity aerobic exercise or "
"75 minutes of vigorous-intensity aerobic exercise per week, along with muscle-strengthening "
"activities at least two days per week.\n"
"2. Eat a balanced diet: Eating a balanced diet that is rich in fruits, "
"vegetables, whole grains, lean proteins, and healthy fats can help support "
"your overall health. Try to limit your intake of processed and high-sugar foods, "
"and aim to drink plenty of water throughout the day.\n"
"3. Get enough sleep: Getting enough quality sleep is essential for your physical "
"and mental health. Adults should aim for seven to nine hours of sleep per night. "
"Establish a regular sleep schedule and try to create a relaxing bedtime routine to "
"help improve the quality of your sleep.")
),
offset=2,
sep_style=SeparatorStyle.SINGLE,
sep="###",
)
conv_v1_2 = Conversation(
system="A chat between a curious human and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the human's questions.",
roles=("Human", "Assistant"),
messages=(
("Human", "What are the key differences between renewable and non-renewable energy sources?"),
("Assistant",
"Renewable energy sources are those that can be replenished naturally in a relatively "
"short amount of time, such as solar, wind, hydro, geothermal, and biomass. "
"Non-renewable energy sources, on the other hand, are finite and will eventually be "
"depleted, such as coal, oil, and natural gas. Here are some key differences between "
"renewable and non-renewable energy sources:\n"
"1. Availability: Renewable energy sources are virtually inexhaustible, while non-renewable "
"energy sources are finite and will eventually run out.\n"
"2. Environmental impact: Renewable energy sources have a much lower environmental impact "
"than non-renewable sources, which can lead to air and water pollution, greenhouse gas emissions, "
"and other negative effects.\n"
"3. Cost: Renewable energy sources can be more expensive to initially set up, but they typically "
"have lower operational costs than non-renewable sources.\n"
"4. Reliability: Renewable energy sources are often more reliable and can be used in more remote "
"locations than non-renewable sources.\n"
"5. Flexibility: Renewable energy sources are often more flexible and can be adapted to different "
"situations and needs, while non-renewable sources are more rigid and inflexible.\n"
"6. Sustainability: Renewable energy sources are more sustainable over the long term, while "
"non-renewable sources are not, and their depletion can lead to economic and social instability.\n")
),
offset=2,
sep_style=SeparatorStyle.SINGLE,
sep="###",
)
conv_vicuna_v1_1 = Conversation(
system="A chat between a curious user and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the user's questions.",
roles=("USER", "ASSISTANT"),
version="v1",
messages=(),
offset=0,
sep_style=SeparatorStyle.TWO,
sep=" ",
sep2="</s>",
)
conv_mpt = Conversation(
system="""<|im_start|>system
You should follow the instructions carefully and explain your answers in detail.""",
# system = None,
roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
version="mpt",
messages=(),
offset=0,
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>",
)
conv_mpt_eval = Conversation(
system="",
roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
version="mpt",
messages=(),
offset=0,
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>",
)
conv_mpt_text = Conversation(
system="""<|im_start|>system
- You are a helpful assistant chatbot trained by MosaicML.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.""",
roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
version="mpt",
messages=(),
offset=0,
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>",
)
conv_bair_v1 = Conversation(
system="BEGINNING OF CONVERSATION:",
roles=("USER", "GPT"),
messages=(),
offset=0,
sep_style=SeparatorStyle.TWO,
sep=" ",
sep2="</s>",
)
simple_conv = Conversation(
system="",
roles=("Human", "Assistant"),
messages=(
),
offset=0,
sep_style=SeparatorStyle.SINGLE,
sep="###",
)
simple_conv_multimodal = Conversation(
system="You are vary, a large language and vision assistant trained by Foundation Model Group, Megvii Technology."
"You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language."
"Follow the instructions carefully and explain your answers in detail.",
# system="",
roles=("Human", "Assistant"),
messages=(
("Human", "Hi!"),
("Assistant", "Hi there! How can I help you today?\n")
),
offset=2,
sep_style=SeparatorStyle.SINGLE,
sep="###",
)
simple_conv_mpt_multimodal = Conversation(
system="""<|im_start|>system
- You are vary, a large language and vision assistant trained by Foundation Model Group, Megvii Technology.
- You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.
- You should follow the instructions carefully and explain your answers in detail.""",
roles=("<|im_start|>user\n", "<|im_start|>assistant\n"),
version="mpt",
messages=(),
offset=0,
sep_style=SeparatorStyle.MPT,
sep="<|im_end|>",
)
simple_conv_legacy = Conversation(
system="You are vary, a large language model trained by Foundation Model Group, Megvii Technology."
"You are designed to assist human with a variety of tasks using natural language."
"Follow the instructions carefully.",
roles=("Human", "Assistant"),
messages=(
("Human", "Hi!\n\n### Response:"),
("Assistant", "Hi there! How can I help you today?\n")
),
offset=2,
sep_style=SeparatorStyle.SINGLE,
sep="###",
)
conv_llava_v1 = Conversation(
system="You are vary, a large language and vision assistant trained by Foundation Model Group, Megvii Technology."
"You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language."
"Follow the instructions carefully and explain your answers in detail.",
roles=("USER", "ASSISTANT"),
version="v1",
messages=(),
offset=0,
sep_style=SeparatorStyle.TWO,
sep=" ",
sep2="</s>",
)
default_conversation = conv_mpt
conv_templates = {
"default": simple_conv_multimodal,
"simple": simple_conv,
"simple_legacy": simple_conv_legacy,
"multimodal": simple_conv,
"mpt_multimodal": simple_conv_mpt_multimodal,
"llava_v1": conv_llava_v1,
"mpt_eval": conv_mpt_eval,
# fastchat
"v1": conv_vicuna_v1_1,
"baichuan": conv_vicuna_v1_1,
"bair_v1": conv_bair_v1,
"vicuna_v1_1": conv_vicuna_v1_1,
"mpt": conv_mpt,
"qwen": conv_mpt,
"mpt_text": conv_mpt_text,
}
if __name__ == "__main__":
print(default_conversation.get_prompt())
# Adopted from https://github.com/lm-sys/FastChat. Below is the original copyright:
from typing import List, Optional, Tuple
import torch
from torch import nn
import transformers
from transformers.models.llama.modeling_llama import apply_rotary_pos_emb
from einops import rearrange
from flash_attn.flash_attn_interface import flash_attn_unpadded_qkvpacked_func
from flash_attn.bert_padding import unpad_input, pad_input
def forward(
self,
hidden_states: torch.Tensor,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
attention_mask: Optional[torch.Tensor] = None,
output_attentions: bool = False,
use_cache: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor],
Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel
attention_mask: [bsz, q_len]
"""
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states).view(
bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
key_states = self.k_proj(hidden_states).view(
bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
value_states = self.v_proj(hidden_states).view(
bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
# [bsz, q_len, nh, hd]
# [bsz, nh, q_len, hd]
kv_seq_len = key_states.shape[-2]
offset = 0
if past_key_value is not None:
offset = past_key_value[0].shape[-2]
kv_seq_len += offset
cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
query_states, key_states = apply_rotary_pos_emb(query_states,
key_states,
cos,
sin,
offset=offset)
# [bsz, nh, t, hd]
assert not output_attentions, "output_attentions is not supported"
assert not use_cache, "use_cache is not supported"
assert past_key_value is None, "past_key_value is not supported"
# Flash attention codes from
# https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attention.py
# transform the data into the format required by flash attention
qkv = torch.stack([query_states, key_states, value_states], dim=2) # [bsz, nh, 3, q_len, hd]
qkv = qkv.transpose(1, 3) # [bsz, q_len, 3, nh, hd]
# We have disabled _prepare_decoder_attention_mask in LlamaModel
# the attention_mask should be the same as the key_padding_mask
key_padding_mask = attention_mask
if key_padding_mask is None:
qkv = rearrange(qkv, 'b s ... -> (b s) ...')
max_s = q_len
cu_q_lens = torch.arange(0, (bsz + 1) * q_len, step=q_len, dtype=torch.int32,
device=qkv.device)
output = flash_attn_unpadded_qkvpacked_func(
qkv, cu_q_lens, max_s, 0.0,
softmax_scale=None, causal=True
)
output = rearrange(output, '(b s) ... -> b s ...', b=bsz)
else:
nheads = qkv.shape[-2]
x = rearrange(qkv, 'b s three h d -> b s (three h d)')
x_unpad, indices, cu_q_lens, max_s = unpad_input(x, key_padding_mask)
x_unpad = rearrange(x_unpad, 'nnz (three h d) -> nnz three h d', three=3, h=nheads)
output_unpad = flash_attn_unpadded_qkvpacked_func(
x_unpad, cu_q_lens, max_s, 0.0,
softmax_scale=None, causal=True
)
output = rearrange(pad_input(rearrange(output_unpad, 'nnz h d -> nnz (h d)'),
indices, bsz, q_len),
'b s (h d) -> b s h d', h=nheads)
return self.o_proj(rearrange(output,
'b s h d -> b s (h d)')), None, None
# Disable the transformation of the attention mask in LlamaModel as the flash attention
# requires the attention mask to be the same as the key_padding_mask
def _prepare_decoder_attention_mask(self, attention_mask, input_shape,
inputs_embeds, past_key_values_length):
# [bsz, seq_len]
return attention_mask
def replace_llama_attn_with_flash_attn():
transformers.models.llama.modeling_llama.LlamaModel._prepare_decoder_attention_mask = _prepare_decoder_attention_mask
transformers.models.llama.modeling_llama.LlamaAttention.forward = forward
import datetime
import logging
import logging.handlers
import os
import sys
import torch
import requests
from transformers import StoppingCriteria
from vary.utils.constants import LOGDIR
server_error_msg = "**NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.**"
moderation_msg = "YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES. PLEASE TRY AGAIN."
handler = None
def build_logger(logger_name, logger_filename):
global handler
formatter = logging.Formatter(
fmt="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# Set the format of root handlers
if not logging.getLogger().handlers:
logging.basicConfig(level=logging.INFO)
logging.getLogger().handlers[0].setFormatter(formatter)
# Redirect stdout and stderr to loggers
stdout_logger = logging.getLogger("stdout")
stdout_logger.setLevel(logging.INFO)
sl = StreamToLogger(stdout_logger, logging.INFO)
sys.stdout = sl
stderr_logger = logging.getLogger("stderr")
stderr_logger.setLevel(logging.ERROR)
sl = StreamToLogger(stderr_logger, logging.ERROR)
sys.stderr = sl
# Get logger
logger = logging.getLogger(logger_name)
logger.setLevel(logging.INFO)
# Add a file handler for all loggers
if handler is None:
os.makedirs(LOGDIR, exist_ok=True)
filename = os.path.join(LOGDIR, logger_filename)
handler = logging.handlers.TimedRotatingFileHandler(
filename, when='D', utc=True)
handler.setFormatter(formatter)
for name, item in logging.root.manager.loggerDict.items():
if isinstance(item, logging.Logger):
item.addHandler(handler)
return logger
class StreamToLogger(object):
"""
Fake file-like stream object that redirects writes to a logger instance.
"""
def __init__(self, logger, log_level=logging.INFO):
self.terminal = sys.stdout
self.logger = logger
self.log_level = log_level
self.linebuf = ''
def __getattr__(self, attr):
return getattr(self.terminal, attr)
def write(self, buf):
temp_linebuf = self.linebuf + buf
self.linebuf = ''
for line in temp_linebuf.splitlines(True):
# From the io.TextIOWrapper docs:
# On output, if newline is None, any '\n' characters written
# are translated to the system default line separator.
# By default sys.stdout.write() expects '\n' newlines and then
# translates them so this is still cross platform.
if line[-1] == '\n':
self.logger.log(self.log_level, line.rstrip())
else:
self.linebuf += line
def flush(self):
if self.linebuf != '':
self.logger.log(self.log_level, self.linebuf.rstrip())
self.linebuf = ''
def disable_torch_init():
"""
Disable the redundant torch default initialization to accelerate model creation.
"""
import torch
setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
def violates_moderation(text):
"""
Check whether the text violates OpenAI moderation API.
"""
url = "https://api.openai.com/v1/moderations"
headers = {"Content-Type": "application/json",
"Authorization": "Bearer " + os.environ["OPENAI_API_KEY"]}
text = text.replace("\n", "")
data = "{" + '"input": ' + f'"{text}"' + "}"
data = data.encode("utf-8")
try:
ret = requests.post(url, headers=headers, data=data, timeout=5)
flagged = ret.json()["results"][0]["flagged"]
except requests.exceptions.RequestException as e:
flagged = False
except KeyError as e:
flagged = False
return flagged
def pretty_print_semaphore(semaphore):
if semaphore is None:
return "None"
return f"Semaphore(value={semaphore._value}, locked={semaphore.locked()})"
class KeywordsStoppingCriteria(StoppingCriteria):
def __init__(self, keywords, tokenizer, input_ids):
self.keywords = keywords
self.keyword_ids = [tokenizer(keyword).input_ids for keyword in keywords]
self.keyword_ids = [keyword_id[0] for keyword_id in self.keyword_ids if type(keyword_id) is list and len(keyword_id) == 1]
self.tokenizer = tokenizer
self.start_len = None
self.input_ids = input_ids
def __call__(self, output_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
if self.start_len is None:
self.start_len = self.input_ids.shape[1]
else:
for keyword_id in self.keyword_ids:
if output_ids[0, -1] == keyword_id:
return True
outputs = self.tokenizer.batch_decode(output_ids[:, self.start_len:], skip_special_tokens=True)[0]
for keyword in self.keywords:
if keyword in outputs:
return True
return False
def smart_tokenizer_and_embedding_resize(special_tokens_dict, tokenizer, model):
"""Resize tokenizer and embedding.
Note: This is the unoptimized version that may make your embedding size not be divisible by 64.
"""
# num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
# # num_new_tokens = 1
# # tokenizer.add_tokens(special_tokens_dict, special_tokens=True)
# model.resize_token_embeddings(len(tokenizer))
num_new_tokens = tokenizer.add_special_tokens(special_tokens_dict)
model.resize_token_embeddings(len(tokenizer))
if num_new_tokens > 0:
input_embeddings = model.get_input_embeddings().weight.data
output_embeddings = model.get_output_embeddings().weight.data
input_embeddings_avg = input_embeddings[:-num_new_tokens].mean(
dim=0, keepdim=True)
output_embeddings_avg = output_embeddings[:-num_new_tokens].mean(
dim=0, keepdim=True)
input_embeddings[-num_new_tokens:] = input_embeddings_avg
output_embeddings[-num_new_tokens:] = output_embeddings_avg
def maybe_zero_3(param, ignore_status=False, name=None):
from deepspeed import zero
from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
if hasattr(param, "ds_id"):
if param.ds_status == ZeroParamStatus.NOT_AVAILABLE:
if not ignore_status:
logging.warning(f"{name}: param.ds_status != ZeroParamStatus.NOT_AVAILABLE: {param.ds_status}")
with zero.GatheredParameters([param]):
param = param.data.detach().cpu().clone()
else:
param = param.detach().cpu().clone()
return param
# Borrowed from peft.utils.get_peft_model_state_dict
def get_peft_state_maybe_zero_3(named_params, bias):
if bias == "none":
to_return = {k: t for k, t in named_params if "lora_" in k}
elif bias == "all":
to_return = {k: t for k, t in named_params if "lora_" in k or "bias" in k}
elif bias == "lora_only":
to_return = {}
maybe_lora_bias = {}
lora_bias_names = set()
for k, t in named_params:
if "lora_" in k:
to_return[k] = t
bias_name = k.split("lora_")[0] + "bias"
lora_bias_names.add(bias_name)
elif "bias" in k:
maybe_lora_bias[k] = t
for k, t in maybe_lora_bias:
if bias_name in lora_bias_names:
to_return[bias_name] = t
else:
raise NotImplementedError
to_return = {k: maybe_zero_3(v, name=k) for k, v in to_return.items()}
return to_return
def get_peft_state_non_lora_maybe_zero_3(named_params, require_grad_only=True):
to_return = {k: t for k, t in named_params if "lora_" not in k}
if require_grad_only:
to_return = {k: t for k, t in to_return.items() if t.requires_grad}
to_return = {k: maybe_zero_3(v, ignore_status=True).cpu() for k, v in to_return.items()}
return to_return
def find_all_linear_names(model):
cls = torch.nn.Linear
lora_module_names = set()
for name, module in model.named_modules():
if isinstance(module, cls) and 'vision_model' not in name and 'mm_projector' not in name and 'vision_encoder' not in name and 'conv_final' not in name and'lm_head' not in name:
lora_module_names.add(name)
print(lora_module_names)
return list(lora_module_names)
\ No newline at end of file
{
"bf16": {
"enabled": true
},
"train_micro_batch_size_per_gpu": "auto",
"zero_optimization": {
"stage": 2,
"overlap_comm": true,
"contiguous_gradients": true,
"sub_group_size": 1e9,
"reduce_bucket_size": "auto"
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment