"src/vscode:/vscode.git/clone" did not exist on "439c35f3b5f184d80d41f44c085a02785f9873f6"
Commit 24eacbc0 authored by chenzk's avatar chenzk
Browse files

v1.0

parents
miniCPM-bf16 @ 10f760ed
Subproject commit 10f760ed0246a8bc6bbc3742f428306a5afabe07
# 转换为 ChatML 格式
import os
import shutil
import json
input_dir = "data/AdvertiseGen"
output_dir = "data/AdvertiseGenChatML"
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
for fn in ["train.json", "dev.json"]:
data_out_list = []
with open(os.path.join(input_dir, fn), "r") as f, open(os.path.join(output_dir, fn), "w") as fo:
for line in f:
if len(line.strip()) > 0:
data = json.loads(line)
data_out = {
"messages": [
{
"role": "user",
"content": data["content"],
},
{
"role": "assistant",
"content": data["summary"],
},
]
}
data_out_list.append(data_out)
json.dump(data_out_list, fo, ensure_ascii=False, indent=4)
[
{
"messages": [
{
"role": "user",
"content": "简约而不简单的牛仔外套,白色的衣身十分百搭。衣身多处有做旧破洞设计,打破单调乏味,增加一丝造型看点。衣身后背处有趣味刺绣装饰,丰富层次感,彰显别样时尚。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这款BRAND针织两件套连衣裙,简约的纯色半高领针织上衣,修饰着颈部线,尽显优雅气质。同时搭配叠穿起一条背带式的复古格纹裙,整体散发着一股怀旧的时髦魅力,很是文艺范。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "嘻哈玩转童年,随时<UNK>,没错,出街还是要靠卫衣来装酷哦!时尚个性的连帽设计,率性有范还防风保暖。还有胸前撞色的卡通印花设计,靓丽抢眼更富有趣味性,加上前幅大容量又时尚美观的袋鼠兜,简直就是孩子耍帅装酷必备的利器。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "裤子是简约大方的版型设计,带来一种极简主义风格而且不乏舒适优雅感,是衣橱必不可少的一件百搭单品。标志性的logo可以体现出一股子浓郁的英伦风情,轻而易举带来独一无二的<UNK>体验。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这款来自梵凯的半身裙富有十足的设计感,采用了别致的不规则设计,凸显出时尚前卫的格调,再搭配俏皮的高腰设计,收腰提臀的同时还勾勒出优美迷人的身材曲线,而且还帮你拉长腿部比例,释放出优雅娇俏的小女人味。并且独特的弧形下摆还富有流畅的线条美,一颦一动间展现出灵动柔美的气质。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这件衬衫的款式非常的宽松,利落的线条可以很好的隐藏身材上的小缺点,穿在身上有着很好的显瘦效果。领口装饰了一个可爱的抽绳,漂亮的绳结展现出了十足的个性,配合时尚的泡泡袖型,尽显女性甜美可爱的气息。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "宫廷风的甜美蕾丝设计,清醒的蕾丝拼缝处,刺绣定制的贝壳花边,增添了裙子的精致感觉。超大的裙摆,加上精细的小花边设计,上身后既带着仙气撩人又很有女人味。泡泡袖上的提花面料,在细节处增加了浪漫感,春日的仙女姐姐。浪漫蕾丝布满整个裙身,美丽明艳,气质超仙。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "个性化的九分裤型,穿着在身上,能够从视觉上拉长你的身体比例,让你看起来更加的有范。简约的黑色系列,极具时尚的韵味,充分凸显你专属的成熟韵味。修身的立体廓形,为你塑造修长的曲线。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "文艺个性的印花连衣裙,藏青色底蕴,低调又大气,撞色太阳花分布整个裙身,绚丽而美好,带来时尚减龄的气质。基础款的舒适圆领,简约不失大方,勾勒精致脸庞。领后是一粒包布扣固定,穿脱十分方便。前片立体的打褶设计,搭配后片压褶的做工,增添层次和空间感,显瘦又有型。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "裙身处采用立体蝴蝶结装饰辅以蓝色条带点缀,令衣身造型饱满富有层次的同时为其注入一丝甜美气息。将女孩清新娇俏的一面衬托而出。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这条颜色素雅的长裙,以纯净的白色作为底色,辅以印在裙上的点点小碎花,<UNK>勾勒出一幅生动优美的“风景图”,给人一种大自然的清新之感,好似吸收新鲜空气的那种舒畅感。腰间贴心地设计成松紧腰,将腰线很好地展现出来,十分纤巧,在裙子的侧边,有着一个隐形的拉链,能够让你穿脱自如。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "不同于一般的西服裤。这款<UNK>小喇叭羊毛裤在样式上显得更加时髦优雅,特地采用微微的九分喇叭裤腿设计,视觉上将脚踝处显得更加纤细。并且特地甄选柔软的羊毛材质,就算直接贴肤穿着,也不会觉得寒冷,比较适合初秋穿噢。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "上衣与裤子的连体式设计从整体看起来十分的具有大牌的风范。简约,没有任何的其他装饰,把自己的独特尽情展现。上衣胸口两边设有两个加大口袋,更增添了层次感。衣襟<UNK>了拉链,让穿脱更加的方便,轻轻<UNK>一点,显得更加时尚。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这件西装外套选用了经久不衰的格纹元素,通过色彩的明暗对比,展现出丰富的视觉层次,又缔造了英伦风的复古气息。法式的大翻领,延长颈部线条,彰显出女性帅气干练的特殊魅力。宽松舒适的版型完美掩藏了身材的小秘密,给身体自由活动空间。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "我的心情很低落,我不想和任何人交流"
},
{
"role": "assistant",
"content": "1"
}
]
},
{
"messages": [
{
"role": "user",
"content": "我感到焦虑和不安,无法集中精力。"
},
{
"role": "assistant",
"content": "1"
}
]
},
{
"messages": [
{
"role": "user",
"content": "我感觉被忽视了,这让我感到很孤独和无助。"
},
{
"role": "assistant",
"content": "1"
}
]
},
{
"messages": [
{
"role": "user",
"content": "我感到非常沮丧和失望"
},
{
"role": "assistant",
"content": "1"
}
]
}
]
\ No newline at end of file
[
{
"messages": [
{
"role": "user",
"content": "宽松的阔腿裤这两年真的吸粉不少,明星时尚达人的心头爱。毕竟好穿时尚,谁都能穿出腿长2米的效果宽松的裤腿,当然是遮肉小能手啊。上身随性自然不拘束,面料亲肤舒适贴身体验感棒棒哒。系带部分增加设计看点,还让单品的设计感更强。腿部线条若隐若现的,性感撩人。颜色敲温柔的,与裤子本身所呈现的风格有点反差萌。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "圆形领口修饰脖颈线条,适合各种脸型,耐看有气质。无袖设计,尤显清凉,简约横条纹装饰,使得整身人鱼造型更为生动立体。加之撞色的鱼尾下摆,深邃富有诗意。收腰包臀,修饰女性身体曲线,结合别出心裁的鱼尾裙摆设计,勾勒出自然流畅的身体轮廓,展现了婀娜多姿的迷人姿态。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "宽松的卫衣版型包裹着整个身材,宽大的衣身与身材形成鲜明的对比描绘出纤瘦的身形。下摆与袖口的不规则剪裁设计,彰显出时尚前卫的形态。被剪裁过的样式呈现出布条状自然地垂坠下来,别具有一番设计感。线条分明的字母样式有着花式的外观,棱角分明加上具有少女元气的枣红色十分有年轻活力感。粉红色的衣身把肌肤衬托得很白嫩又健康。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "踩着轻盈的步伐享受在午后的和煦风中,让放松与惬意感为你免去一身的压力与束缚,仿佛要将灵魂也寄托在随风摇曳的雪纺连衣裙上,吐露出<UNK>微妙而又浪漫的清新之意。宽松的a字版型除了能够带来足够的空间,也能以上窄下宽的方式强化立体层次,携带出自然优雅的曼妙体验。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "想要在人群中脱颖而出吗?那么最适合您的莫过于这款polo衫短袖,采用了经典的polo领口和柔软纯棉面料,让您紧跟时尚潮流。再配合上潮流的蓝色拼接设计,使您的风格更加出众。就算单从选料上来说,这款polo衫的颜色沉稳经典,是这个季度十分受大众喜爱的风格了,而且兼具舒适感和时尚感。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "小女人十足的条纹衬衣,缎面一点点的复古,还有蓝绿色这种高级气质复古色,真丝材质,撞色竖条纹特别的现代感味道,直h型的裁剪和特别的衣长款式,更加独立性格。双层小立领,更显脸型。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这款连衣裙,由上到下都透出一丝迷人诱惑的女性魅力,经典圆领型,开口度恰好,露出你的迷人修长的脖颈线条,很是优雅气质,短袖设计,在这款上竟是撩人美貌,高腰线,散开的裙摆,到小腿的长度,遮住了腿部粗的部分,对身材有很好的修饰作用,穿起来很女神;裙身粉红色花枝重工刺绣,让人一眼难忘!而且在这种网纱面料上做繁复图案的绣花,是很考验工艺的,对机器的要求会更高,更加凸显我们的高品质做工;"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "一款非常简洁大方的纯色卫衣,设计点在于胸前的“<UNK><UNK>”的中文字印花,新颖特别,让人眼前一亮。简单又吸睛的款式,而且不失时髦感,很适合个性年轻人。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "看惯了灰色的冷淡和黑色的沉闷感,来一点醒目的彩色增添点活力吧。亮眼又吸睛的姜黄色色调,嫩肤显白非常的有设计感。趣味的撞色和宽松的版型相交辉映,修饰身形小缺点的同时,时尚又百搭。优雅的落肩袖,轻松修饰肩部线条,让毛衣上身凸显出一丝慵懒随性的休闲感,时尚魅力尽显。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "黑与白,两种最极端的颜色却轻松搭配成了经典,就像此款衬衣,无需过多装饰,仅色调就足够醒目个性,受潮<UNK>所喜欢。做了无袖中长款的样式,走路带风的感觉着实不错,圆领的设计,不是常规的衬衫领,少了点正式反而有种休闲感觉,适合孩子们穿着。后背大面积撞色印花装点,是时尚潮流的象征,也让衣衣不至于单调,轻松就能穿出彩。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "个性休闲风的连帽卫衣造型时髦大方,宽松的版型剪裁让肉肉的小宝贝也可以穿着,保暖的连帽设计时刻给予宝贝温柔的呵护,袖子和后背别致时髦的字母印花点缀,满满的街头元素融入,演绎休闲朋克风,对称的小口袋美观大方,方便放置更多的随身物品。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "简单大气的设计,不费吹灰之力就能搭配的时髦范儿。时尚的配色一点都不觉得平淡了,有种浑然天成的大气感。强调了整体的装饰,和谐又不失个性,搭配裤装帅气十足,搭配裙子精致优雅。链条和肩带的搭配让使用感更加舒服,单肩手提都好看。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "深蓝色的高腰牛仔裤,修身的款式勾勒出纤细的美腿。牛仔裤的裤脚设计<UNK>张开的喇叭型,巧妙地修饰了小腿的线条,洋溢着复古的年代感。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这是一件显得特别清新的衬衣,采用了条纹的设计,给予人一种甜美可人的气质。并且融合了别致的斜肩一字领设计,高调的展示出性感的锁骨,将迷人的香肩展现在外,性感中不失去清纯的气息。袖口处的蝴蝶结系带装饰,增添了俏皮的韵味,简洁大方。且在下摆处采用了不对称的设计,增强了视觉效果,更显潮流。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "作为基础款单品,牛仔裤也<UNK><UNK>,想要呈现给大家的是——每次搭配都有新感觉。裤子经过复古做旧处理,风格鲜明,也很注重细节,连纽扣也做了统一的做旧处理,融入个性十足的磨破设计,高腰直筒basic裤型,修饰身材,穿出高挑长腿。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "一款温暖柔软又富有弹性的针织衫,不仅可以抵御严寒侵袭,还能更好地进行搭配。v领的设计,能勾勒出迷人的天鹅颈以及衬托出娇小的脸型。宽松又别致的剪裁,能从视觉上显露纤长的下半身,起到显瘦的效果。直筒造型的袖子,修饰出优美的手臂线条,衣身上的方格刺绣,时尚又吸睛。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "绿色的衣身上镶嵌着<UNK>,就是这款衬衫最大的迷人之处,“红花配绿叶”般的色调,将清新气息阐述的淋漓尽致。经典的翻领更是贴心,修饰颈部线条的同时,尽显精致干练的气质,出街轻松凹造型。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这款外套采用了撞色拉链织带以及字母印花设计。这两种元素的融入使外套不会显得过于单调沉闷,吸睛而亮眼,充满年轻与朝气感,非常减龄。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "本款连衣裙整体采用h型的轮廓设计,藏肉显瘦,不挑身材,适合各种身形的人穿着。小翻领的领口设计,使得本款连衣裙穿在身上看起来十分的精神帅气,具有青春活力。单排扣的衣门襟设计,又给本款连衣裙带来了一丝的复古味道。裙身上的刺绣花朵装饰,使得本款连衣裙不显得单调,富有层次感,上身给人一种独特的时尚魅力。长袖的设计,更加的贴合手臂曲线,上身更加的舒适贴身。"
},
{
"role": "assistant",
"content": "0"
}
]
},
{
"messages": [
{
"role": "user",
"content": "我感到被背叛了,这让我感到很心痛和失望。"
},
{
"role": "assistant",
"content": "1"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这件事情让我感到很羞耻和尴尬。"
},
{
"role": "assistant",
"content": "1"
}
]
},
{
"messages": [
{
"role": "user",
"content": "我感到很内疚和自责,因为我没有做好。"
},
{
"role": "assistant",
"content": "1"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这段关系让我感到很疲惫和无力,我不知道该怎么办。"
},
{
"role": "assistant",
"content": "1"
}
]
},
{
"messages": [
{
"role": "user",
"content": "我的心情很沉重,我无法摆脱这种负面情绪。"
},
{
"role": "assistant",
"content": "1"
}
]
},
{
"messages": [
{
"role": "user",
"content": "这件事情让我感到很愤怒和不满。"
},
{
"role": "assistant",
"content": "1"
}
]
}
]
from typing import Dict
from typing import List
from typing import Tuple
import argparse
import gradio as gr
import torch
from threading import Thread
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer
)
import warnings
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, default="")
parser.add_argument("--torch_dtype", type=str, default="bfloat16", choices=["float32", "bfloat16"])
parser.add_argument("--server_name", type=str, default="127.0.0.1")
parser.add_argument("--server_port", type=int, default=7860)
args = parser.parse_args()
# init model torch dtype
torch_dtype = args.torch_dtype
if torch_dtype =="" or torch_dtype == "bfloat16":
torch_dtype = torch.bfloat16
elif torch_dtype == "float32":
torch_dtype = torch.float32
else:
raise ValueError(f"Invalid torch dtype: {torch_dtype}")
# init model and tokenizer
path = args.model_path
tokenizer = AutoTokenizer.from_pretrained(path)
model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch_dtype, device_map="auto", trust_remote_code=True)
# init gradio demo host and port
server_name=args.server_name
server_port=args.server_port
def hf_gen(dialog: List, top_p: float, temperature: float, repetition_penalty: float, max_dec_len: int):
"""generate model output with huggingface api
Args:
query (str): actual model input.
top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
temperature (float): Strictly positive float value used to modulate the logits distribution.
max_dec_len (int): The maximum numbers of tokens to generate.
Yields:
str: real-time generation results of hf model
"""
inputs = tokenizer.apply_chat_template(dialog, tokenize=False, add_generation_prompt=False)
enc = tokenizer(inputs, return_tensors="pt").to("cuda")
streamer = TextIteratorStreamer(tokenizer)
generation_kwargs = dict(
enc,
do_sample=True,
top_k=0,
top_p=top_p,
temperature=temperature,
repetition_penalty=repetition_penalty,
max_new_tokens=max_dec_len,
pad_token_id=tokenizer.eos_token_id,
streamer=streamer,
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
answer = ""
for new_text in streamer:
answer += new_text
yield answer[4 + len(inputs):]
def generate(chat_history: List, query: str, top_p: float, temperature: float, repetition_penalty: float, max_dec_len: int):
"""generate after hitting "submit" button
Args:
chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
query (str): query of current round
top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
temperature (float): strictly positive float value used to modulate the logits distribution.
max_dec_len (int): The maximum numbers of tokens to generate.
Yields:
List: [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n], [q_n+1, a_n+1]]. chat_history + QA of current round.
"""
assert query != "", "Input must not be empty!!!"
# apply chat template
model_input = []
for q, a in chat_history:
model_input.append({"role": "user", "content": q})
model_input.append({"role": "assistant", "content": a})
model_input.append({"role": "user", "content": query})
# yield model generation
chat_history.append([query, ""])
for answer in hf_gen(model_input, top_p, temperature, repetition_penalty, max_dec_len):
chat_history[-1][1] = answer.strip("</s>")
yield gr.update(value=""), chat_history
def regenerate(chat_history: List, top_p: float, temperature: float, repetition_penalty: float, max_dec_len: int):
"""re-generate the answer of last round's query
Args:
chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
temperature (float): strictly positive float value used to modulate the logits distribution.
max_dec_len (int): The maximum numbers of tokens to generate.
Yields:
List: [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. chat_history
"""
assert len(chat_history) >= 1, "History is empty. Nothing to regenerate!!"
# apply chat template
model_input = []
for q, a in chat_history[:-1]:
model_input.append({"role": "user", "content": q})
model_input.append({"role": "assistant", "content": a})
model_input.append({"role": "user", "content": chat_history[-1][0]})
# yield model generation
for answer in hf_gen(model_input, top_p, temperature, repetition_penalty, max_dec_len):
chat_history[-1][1] = answer.strip("</s>")
yield gr.update(value=""), chat_history
def clear_history():
"""clear all chat history
Returns:
List: empty chat history
"""
return []
def reverse_last_round(chat_history):
"""reverse last round QA and keep the chat history before
Args:
chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
Returns:
List: [[q_1, a_1], [q_2, a_2], ..., [q_n-1, a_n-1]]. chat_history without last round.
"""
assert len(chat_history) >= 1, "History is empty. Nothing to reverse!!"
return chat_history[:-1]
# launch gradio demo
with gr.Blocks(theme="soft") as demo:
gr.Markdown("""# MiniCPM Gradio Demo""")
with gr.Row():
with gr.Column(scale=1):
top_p = gr.Slider(0, 1, value=0.8, step=0.1, label="top_p")
temperature = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="temperature")
repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, step=0.1, label="repetition_penalty")
max_dec_len = gr.Slider(1, 1024, value=1024, step=1, label="max_dec_len")
with gr.Column(scale=5):
chatbot = gr.Chatbot(bubble_full_width=False, height=400)
user_input = gr.Textbox(label="User", placeholder="Input your query here!", lines=8)
with gr.Row():
submit = gr.Button("Submit")
clear = gr.Button("Clear")
regen = gr.Button("Regenerate")
reverse = gr.Button("Reverse")
submit.click(generate, inputs=[chatbot, user_input, top_p, temperature, repetition_penalty, max_dec_len], outputs=[user_input, chatbot])
regen.click(regenerate, inputs=[chatbot, top_p, temperature, repetition_penalty, max_dec_len], outputs=[user_input, chatbot])
clear.click(clear_history, inputs=[], outputs=[chatbot])
reverse.click(reverse_last_round, inputs=[chatbot], outputs=[chatbot])
demo.queue()
demo.launch(server_name=server_name, server_port=server_port, show_error=True)
from typing import Dict
from typing import List
from typing import Tuple
import argparse
import gradio as gr
from vllm import LLM, SamplingParams
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", type=str, default="")
parser.add_argument("--torch_dtype", type=str, default="bfloat16", choices=["float32", "bfloat16"])
parser.add_argument("--server_name", type=str, default="127.0.0.1")
parser.add_argument("--server_port", type=int, default=7860)
args = parser.parse_args()
# init model torch dtype
torch_dtype = args.torch_dtype
if torch_dtype =="" or torch_dtype == "bfloat16":
torch_dtype = "bfloat16"
elif torch_dtype == "float32":
torch_dtype = "float32"
else:
raise ValueError(f"Invalid torch dtype: {torch_dtype}")
# init model and tokenizer
path = args.model_path
llm = LLM(model=path, tensor_parallel_size=1, dtype=torch_dtype)
# init gradio demo host and port
server_name=args.server_name
server_port=args.server_port
def vllm_gen(dialog: List, top_p: float, temperature: float, max_dec_len: int):
"""generate model output with huggingface api
Args:
query (str): actual model input.
top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
temperature (float): Strictly positive float value used to modulate the logits distribution.
max_dec_len (int): The maximum numbers of tokens to generate.
Yields:
str: real-time generation results of hf model
"""
prompt = ""
assert len(dialog) % 2 == 1
for info in dialog:
if info["role"] == "user":
prompt += "<用户>" + info["content"]
else:
prompt += "<AI>" + info["content"]
prompt += "<AI>"
params_dict = {
"n": 1,
"best_of": 1,
"presence_penalty": 1.0,
"frequency_penalty": 0.0,
"temperature": temperature,
"top_p": top_p,
"top_k": -1,
"use_beam_search": False,
"length_penalty": 1,
"early_stopping": False,
"stop": None,
"stop_token_ids": None,
"ignore_eos": False,
"max_tokens": max_dec_len,
"logprobs": None,
"prompt_logprobs": None,
"skip_special_tokens": True,
}
sampling_params = SamplingParams(**params_dict)
outputs = llm.generate(prompts=prompt, sampling_params=sampling_params)[0]
generated_text = outputs.outputs[0].text
return generated_text
def generate(chat_history: List, query: str, top_p: float, temperature: float, max_dec_len: int):
"""generate after hitting "submit" button
Args:
chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
query (str): query of current round
top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
temperature (float): strictly positive float value used to modulate the logits distribution.
max_dec_len (int): The maximum numbers of tokens to generate.
Yields:
List: [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n], [q_n+1, a_n+1]]. chat_history + QA of current round.
"""
assert query != "", "Input must not be empty!!!"
# apply chat template
model_input = []
for q, a in chat_history:
model_input.append({"role": "user", "content": q})
model_input.append({"role": "assistant", "content": a})
model_input.append({"role": "user", "content": query})
# yield model generation
model_output = vllm_gen(model_input, top_p, temperature, max_dec_len)
chat_history.append([query, model_output])
return gr.update(value=""), chat_history
def regenerate(chat_history: List, top_p: float, temperature: float, max_dec_len: int):
"""re-generate the answer of last round's query
Args:
chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
temperature (float): strictly positive float value used to modulate the logits distribution.
max_dec_len (int): The maximum numbers of tokens to generate.
Yields:
List: [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. chat_history
"""
assert len(chat_history) >= 1, "History is empty. Nothing to regenerate!!"
# apply chat template
model_input = []
for q, a in chat_history[:-1]:
model_input.append({"role": "user", "content": q})
model_input.append({"role": "assistant", "content": a})
model_input.append({"role": "user", "content": chat_history[-1][0]})
# yield model generation
model_output = vllm_gen(model_input, top_p, temperature, max_dec_len)
chat_history[-1][1] = model_output
return gr.update(value=""), chat_history
def clear_history():
"""clear all chat history
Returns:
List: empty chat history
"""
return []
def reverse_last_round(chat_history):
"""reverse last round QA and keep the chat history before
Args:
chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
Returns:
List: [[q_1, a_1], [q_2, a_2], ..., [q_n-1, a_n-1]]. chat_history without last round.
"""
assert len(chat_history) >= 1, "History is empty. Nothing to reverse!!"
return chat_history[:-1]
# launch gradio demo
with gr.Blocks(theme="soft") as demo:
gr.Markdown("""# MiniCPM Gradio Demo""")
with gr.Row():
with gr.Column(scale=1):
top_p = gr.Slider(0, 1, value=0.8, step=0.1, label="top_p")
temperature = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="temperature")
max_dec_len = gr.Slider(1, 1024, value=1024, step=1, label="max_dec_len")
with gr.Column(scale=5):
chatbot = gr.Chatbot(bubble_full_width=False, height=400)
user_input = gr.Textbox(label="User", placeholder="Input your query here!", lines=8)
with gr.Row():
submit = gr.Button("Submit")
clear = gr.Button("Clear")
regen = gr.Button("Regenerate")
reverse = gr.Button("Reverse")
submit.click(generate, inputs=[chatbot, user_input, top_p, temperature, max_dec_len], outputs=[user_input, chatbot])
regen.click(regenerate, inputs=[chatbot, top_p, temperature, max_dec_len], outputs=[user_input, chatbot])
clear.click(clear_history, inputs=[], outputs=[chatbot])
reverse.click(reverse_last_round, inputs=[chatbot], outputs=[chatbot])
demo.queue()
demo.launch(server_name=server_name, server_port=server_port, show_error=True)
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk23.10-py38
ENV DEBIAN_FRONTEND=noninteractive
# RUN yum update && yum install -y git cmake wget build-essential
RUN source /opt/dtk-23.10/env.sh
# 安装pip相关依赖
COPY requirements.txt requirements.txt
RUN pip3 install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com -r requirements.txt
# for finetune
jieba>=0.42.1
ruamel_yaml>=0.18.5
rouge_chinese>=1.0.3
jupyter>=1.0.0
datasets>=2.16.1
peft>=0.7.1
transformers==4.37.2
# deepspeed>=0.13.1
# flash_attn>=2.5.1
# MiniCPM 微调
[English Version](https://github.com/OpenBMB/MiniCPM/blob/main/finetune/README_en.md)
本目录提供 MiniCPM-2B 模型的微调示例,包括全量微调和 PEFT。格式上,提供多轮对话微调样例和输入输出格式微调样例。
如果将模型下载到了本地,本文和代码中的 `OpenBMB/MiniCPM-2B` 字段均应替换为相应地址以从本地加载模型。
运行示例需要 `python>=3.10`,除基础的 `torch` 依赖外,示例代码运行还需要依赖。
**我们提供了 [示例notebook](lora_finetune.ipynb) 用于演示如何以 AdvertiseGen 为例处理数据和使用微调脚本。**
```bash
pip install -r requirements.txt
```
## 测试硬件标准
我们仅提供了单机多卡/多机多卡的运行示例,因此您需要至少一台具有多个 GPU 的机器。本仓库中的**默认配置文件**中,我们记录了显存的占用情况:
+ SFT 全量微调: 4张显卡平均分配,每张显卡占用 `30245MiB` 显存。
+ LORA 微调: 1张显卡,占用 `10619MiB` 显存。
> 请注意,该结果仅供参考,对于不同的参数,显存占用可能会有所不同。请结合你的硬件情况进行调整。
## 多轮对话格式
多轮对话微调示例采用 ChatGLM3 对话格式约定,对不同角色添加不同 `loss_mask` 从而在一遍计算中为多轮回复计算 `loss`
对于数据文件,样例采用如下格式
```json
[
{
"messages": [
{
"role": "system",
"content": "<system prompt text>"
},
{
"role": "user",
"content": "<user prompt text>"
},
{
"role": "assistant",
"content": "<assistant response text>"
},
// ... Muti Turn
{
"role": "user",
"content": "<user prompt text>"
},
{
"role": "assistant",
"content": "<assistant response text>"
}
]
}
// ...
]
```
## 数据集格式示例
这里以 AdvertiseGen 数据集为例,
您可以从 [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing)
或者 [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) 下载 AdvertiseGen 数据集。
将解压后的 AdvertiseGen 目录放到 `data` 目录下并自行转换为如下格式数据集。
> 请注意,现在的微调代码中加入了验证集,因此,对于一组完整的微调数据集,必须包含训练数据集和验证数据集,测试数据集可以不填写。或者直接用验证数据集代替。
```
{"messages": [{"role": "user", "content": "类型#裙*裙长#半身裙"}, {"role": "assistant", "content": "这款百搭时尚的仙女半身裙,整体设计非常的飘逸随性,穿上之后每个女孩子都能瞬间变成小仙女啦。料子非常的轻盈,透气性也很好,穿到夏天也很舒适。"}]}
```
## 开始微调
通过以下代码执行 **单机多卡/多机多卡** 运行。
```bash
cd finetune
bash sft_finetune.sh
```
通过以下代码执行 **单机单卡** 运行。
```angular2html
cd finetune
bash lora_finetune.sh
```
# MiniCPM Fine-tuning
[中文版](https://github.com/OpenBMB/MiniCPM/blob/main/finetune/README.md)
This directory provides examples of fine-tuning the MiniCPM-2B model, including full model fine-tuning and PEFT. In terms of format, we offer examples for multi-turn dialogue fine-tuning and input-output format fine-tuning.
If you have downloaded the model to your local system, the `OpenBMB/MiniCPM-2B` field mentioned in this document and in the code should be replaced with the corresponding address to load the model from your local system.
Running the example requires `python>=3.10`. Besides the basic `torch` dependency, additional dependencies are needed to run the example code.
**We have provided an [example notebook](lora_finetune.ipynb) to demonstrate how to process data and use the fine-tuning script with AdvertiseGen as an example.**
```bash
pip install -r requirements.txt
```
## Testing Hardware Standard
We only provide examples for single-node multi-GPU/multi-node multi-GPU setups, so you will need at least one machine with multiple GPUs. In the **default configuration file** in this repository, we have documented the memory usage:
+ SFT full parameters fine-tuning: Evenly distributed across 4 GPUs, each GPU consumes `30245MiB` of memory.
+ LORA fine-tuning: One GPU, consuming `10619MiB` of memory.。
> Please note that these results are for reference only, and memory consumption may vary with different parameters. Please adjust according to your hardware situation.
## Multi-Turn Dialogue Format
The multi-turn dialogue fine-tuning example adopts the ChatGLM3 dialogue format convention, adding different `loss_mask` for different roles, thus calculating `loss` for multiple replies in one computation.
For the data file, the example uses the following format
```json
[
{
"messages": [
{
"role": "system",
"content": "<system prompt text>"
},
{
"role": "user",
"content": "<user prompt text>"
},
{
"role": "assistant",
"content": "<assistant response text>"
},
// ... Muti Turn
{
"role": "user",
"content": "<user prompt text>"
},
{
"role": "assistant",
"content": "<assistant response text>"
}
]
}
// ...
]
```
## Dataset Format Example
Here, taking the AdvertiseGen dataset as an example,
you can download the AdvertiseGen dataset from [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing)
or [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) . After extracting the AdvertiseGen directory, place it in the `data` directory and convert it into the following format dataset.
> Please note, the fine-tuning code now includes a validation set, so for a complete set of fine-tuning datasets, it must contain training and validation datasets, while the test dataset is optional. Or, you can use the validation dataset in place of it.
```
{"messages": [{"role": "user", "content": "类型#裙*裙长#半身裙"}, {"role": "assistant", "content": "这款百搭时尚的仙女半身裙,整体设计非常的飘逸随性,穿上之后每个女孩子都能瞬间变成小仙女啦。料子非常的轻盈,透气性也很好,穿到夏天也很舒适。"}]}
```
## Start Fine-tuning
Execute **single-node multi-GPU/multi-node multi-GPU** runs with the following code.
```bash
cd finetune
bash sft_finetune.sh
```
Execute **single-node single-GPU** runs with the following code.
```angular2html
cd finetune
bash lora_finetune.sh
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment