v1.0

24eacbc0 · chenzk · 24eacbc0 · 24eacbc0 · 24eacbc0 · 24eacbc0
Commit 24eacbc0 authored May 09, 2024 by chenzk
20 changed files
--- a/assets/instruction_following.case2.png
+++ b/assets/instruction_following.case2.png
--- a/assets/knowledge.case1.png
+++ b/assets/knowledge.case1.png
--- a/assets/math.case1.png
+++ b/assets/math.case1.png
--- a/assets/math.case2.png
+++ b/assets/math.case2.png
--- a/assets/special_char.case1.png
+++ b/assets/special_char.case1.png
--- a/assets/special_char.case2.png
+++ b/assets/special_char.case2.png
--- a/assets/translation.case1.png
+++ b/assets/translation.case1.png
--- a/assets/translation.case2.png
+++ b/assets/translation.case2.png
--- a/assets/wechat.jpg
+++ b/assets/wechat.jpg
--- a/miniCPM-bf16 @ 10f760ed
+++ b/miniCPM-bf16 @ 10f760ed
+Subproject commit 10f760ed0246a8bc6bbc3742f428306a5afabe07
--- a/convert_data.py
+++ b/convert_data.py
+# 转换为 ChatML 格式
+import os
+import shutil
+import json
+
+input_dir = "data/AdvertiseGen"
+output_dir = "data/AdvertiseGenChatML"
+if os.path.exists(output_dir):
+    shutil.rmtree(output_dir)
+os.makedirs(output_dir, exist_ok=True)
+
+for fn in ["train.json", "dev.json"]:
+    data_out_list = []
+    with open(os.path.join(input_dir, fn), "r") as f, open(os.path.join(output_dir, fn), "w") as fo:
+        for line in f:
+            if len(line.strip()) > 0:
+                data = json.loads(line)
+                data_out = {
+                    "messages": [
+                        {
+                            "role": "user",
+                            "content": data["content"],
+                        },
+                        {
+                            "role": "assistant",
+                            "content": data["summary"],
+                        },
+                    ]
+                }
+                data_out_list.append(data_out)
+        json.dump(data_out_list, fo, ensure_ascii=False, indent=4)
--- a/data/AdvertiseGenChatML/dev.json
+++ b/data/AdvertiseGenChatML/dev.json
+[
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "简约而不简单的牛仔外套，白色的衣身十分百搭。衣身多处有做旧破洞设计，打破单调乏味，增加一丝造型看点。衣身后背处有趣味刺绣装饰，丰富层次感，彰显别样时尚。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这款BRAND针织两件套连衣裙，简约的纯色半高领针织上衣，修饰着颈部线，尽显优雅气质。同时搭配叠穿起一条背带式的复古格纹裙，整体散发着一股怀旧的时髦魅力，很是文艺范。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "嘻哈玩转童年，随时<UNK>，没错，出街还是要靠卫衣来装酷哦！时尚个性的连帽设计，率性有范还防风保暖。还有胸前撞色的卡通印花设计，靓丽抢眼更富有趣味性，加上前幅大容量又时尚美观的袋鼠兜，简直就是孩子耍帅装酷必备的利器。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "裤子是简约大方的版型设计，带来一种极简主义风格而且不乏舒适优雅感，是衣橱必不可少的一件百搭单品。标志性的logo可以体现出一股子浓郁的英伦风情，轻而易举带来独一无二的<UNK>体验。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这款来自梵凯的半身裙富有十足的设计感，采用了别致的不规则设计，凸显出时尚前卫的格调，再搭配俏皮的高腰设计，收腰提臀的同时还勾勒出优美迷人的身材曲线，而且还帮你拉长腿部比例，释放出优雅娇俏的小女人味。并且独特的弧形下摆还富有流畅的线条美，一颦一动间展现出灵动柔美的气质。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这件衬衫的款式非常的宽松，利落的线条可以很好的隐藏身材上的小缺点，穿在身上有着很好的显瘦效果。领口装饰了一个可爱的抽绳，漂亮的绳结展现出了十足的个性，配合时尚的泡泡袖型，尽显女性甜美可爱的气息。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "宫廷风的甜美蕾丝设计，清醒的蕾丝拼缝处，刺绣定制的贝壳花边，增添了裙子的精致感觉。超大的裙摆，加上精细的小花边设计，上身后既带着仙气撩人又很有女人味。泡泡袖上的提花面料，在细节处增加了浪漫感，春日的仙女姐姐。浪漫蕾丝布满整个裙身，美丽明艳，气质超仙。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "个性化的九分裤型，穿着在身上，能够从视觉上拉长你的身体比例，让你看起来更加的有范。简约的黑色系列，极具时尚的韵味，充分凸显你专属的成熟韵味。修身的立体廓形，为你塑造修长的曲线。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "文艺个性的印花连衣裙，藏青色底蕴，低调又大气，撞色太阳花分布整个裙身，绚丽而美好，带来时尚减龄的气质。基础款的舒适圆领，简约不失大方，勾勒精致脸庞。领后是一粒包布扣固定，穿脱十分方便。前片立体的打褶设计，搭配后片压褶的做工，增添层次和空间感，显瘦又有型。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "裙身处采用立体蝴蝶结装饰辅以蓝色条带点缀，令衣身造型饱满富有层次的同时为其注入一丝甜美气息。将女孩清新娇俏的一面衬托而出。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这条颜色素雅的长裙，以纯净的白色作为底色，辅以印在裙上的点点小碎花，<UNK>勾勒出一幅生动优美的“风景图”，给人一种大自然的清新之感，好似吸收新鲜空气的那种舒畅感。腰间贴心地设计成松紧腰，将腰线很好地展现出来，十分纤巧，在裙子的侧边，有着一个隐形的拉链，能够让你穿脱自如。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "不同于一般的西服裤。这款<UNK>小喇叭羊毛裤在样式上显得更加时髦优雅，特地采用微微的九分喇叭裤腿设计，视觉上将脚踝处显得更加纤细。并且特地甄选柔软的羊毛材质，就算直接贴肤穿着，也不会觉得寒冷，比较适合初秋穿噢。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "上衣与裤子的连体式设计从整体看起来十分的具有大牌的风范。简约，没有任何的其他装饰，把自己的独特尽情展现。上衣胸口两边设有两个加大口袋，更增添了层次感。衣襟<UNK>了拉链，让穿脱更加的方便，轻轻<UNK>一点，显得更加时尚。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这件西装外套选用了经久不衰的格纹元素，通过色彩的明暗对比，展现出丰富的视觉层次，又缔造了英伦风的复古气息。法式的大翻领，延长颈部线条，彰显出女性帅气干练的特殊魅力。宽松舒适的版型完美掩藏了身材的小秘密，给身体自由活动空间。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "我的心情很低落，我不想和任何人交流"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "我感到焦虑和不安，无法集中精力。"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "我感觉被忽视了，这让我感到很孤独和无助。"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "我感到非常沮丧和失望"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    }
+]
\ No newline at end of file
--- a/data/AdvertiseGenChatML/train.json
+++ b/data/AdvertiseGenChatML/train.json
+[
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "宽松的阔腿裤这两年真的吸粉不少，明星时尚达人的心头爱。毕竟好穿时尚，谁都能穿出腿长2米的效果宽松的裤腿，当然是遮肉小能手啊。上身随性自然不拘束，面料亲肤舒适贴身体验感棒棒哒。系带部分增加设计看点，还让单品的设计感更强。腿部线条若隐若现的，性感撩人。颜色敲温柔的，与裤子本身所呈现的风格有点反差萌。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "圆形领口修饰脖颈线条，适合各种脸型，耐看有气质。无袖设计，尤显清凉，简约横条纹装饰，使得整身人鱼造型更为生动立体。加之撞色的鱼尾下摆，深邃富有诗意。收腰包臀,修饰女性身体曲线，结合别出心裁的鱼尾裙摆设计，勾勒出自然流畅的身体轮廓，展现了婀娜多姿的迷人姿态。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "宽松的卫衣版型包裹着整个身材，宽大的衣身与身材形成鲜明的对比描绘出纤瘦的身形。下摆与袖口的不规则剪裁设计，彰显出时尚前卫的形态。被剪裁过的样式呈现出布条状自然地垂坠下来，别具有一番设计感。线条分明的字母样式有着花式的外观，棱角分明加上具有少女元气的枣红色十分有年轻活力感。粉红色的衣身把肌肤衬托得很白嫩又健康。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "踩着轻盈的步伐享受在午后的和煦风中，让放松与惬意感为你免去一身的压力与束缚，仿佛要将灵魂也寄托在随风摇曳的雪纺连衣裙上，吐露出<UNK>微妙而又浪漫的清新之意。宽松的a字版型除了能够带来足够的空间，也能以上窄下宽的方式强化立体层次，携带出自然优雅的曼妙体验。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "想要在人群中脱颖而出吗？那么最适合您的莫过于这款polo衫短袖，采用了经典的polo领口和柔软纯棉面料，让您紧跟时尚潮流。再配合上潮流的蓝色拼接设计，使您的风格更加出众。就算单从选料上来说，这款polo衫的颜色沉稳经典，是这个季度十分受大众喜爱的风格了，而且兼具舒适感和时尚感。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "小女人十足的条纹衬衣，缎面一点点的复古，还有蓝绿色这种高级气质复古色，真丝材质，撞色竖条纹特别的现代感味道，直h型的裁剪和特别的衣长款式，更加独立性格。双层小立领，更显脸型。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这款连衣裙，由上到下都透出一丝迷人诱惑的女性魅力，经典圆领型，开口度恰好，露出你的迷人修长的脖颈线条，很是优雅气质，短袖设计，在这款上竟是撩人美貌，高腰线，散开的裙摆，到小腿的长度，遮住了腿部粗的部分，对身材有很好的修饰作用，穿起来很女神；裙身粉红色花枝重工刺绣，让人一眼难忘！而且在这种网纱面料上做繁复图案的绣花，是很考验工艺的，对机器的要求会更高，更加凸显我们的高品质做工；"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "一款非常简洁大方的纯色卫衣，设计点在于胸前的“<UNK><UNK>”的中文字印花，新颖特别，让人眼前一亮。简单又吸睛的款式，而且不失时髦感，很适合个性年轻人。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "看惯了灰色的冷淡和黑色的沉闷感，来一点醒目的彩色增添点活力吧。亮眼又吸睛的姜黄色色调，嫩肤显白非常的有设计感。趣味的撞色和宽松的版型相交辉映，修饰身形小缺点的同时，时尚又百搭。优雅的落肩袖，轻松修饰肩部线条，让毛衣上身凸显出一丝慵懒随性的休闲感，时尚魅力尽显。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "黑与白，两种最极端的颜色却轻松搭配成了经典，就像此款衬衣，无需过多装饰，仅色调就足够醒目个性，受潮<UNK>所喜欢。做了无袖中长款的样式，走路带风的感觉着实不错，圆领的设计，不是常规的衬衫领，少了点正式反而有种休闲感觉，适合孩子们穿着。后背大面积撞色印花装点，是时尚潮流的象征，也让衣衣不至于单调，轻松就能穿出彩。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "个性休闲风的连帽卫衣造型时髦大方，宽松的版型剪裁让肉肉的小宝贝也可以穿着，保暖的连帽设计时刻给予宝贝温柔的呵护，袖子和后背别致时髦的字母印花点缀，满满的街头元素融入，演绎休闲朋克风，对称的小口袋美观大方，方便放置更多的随身物品。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "简单大气的设计，不费吹灰之力就能搭配的时髦范儿。时尚的配色一点都不觉得平淡了，有种浑然天成的大气感。强调了整体的装饰，和谐又不失个性，搭配裤装帅气十足，搭配裙子精致优雅。链条和肩带的搭配让使用感更加舒服，单肩手提都好看。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "深蓝色的高腰牛仔裤，修身的款式勾勒出纤细的美腿。牛仔裤的裤脚设计<UNK>张开的喇叭型，巧妙地修饰了小腿的线条，洋溢着复古的年代感。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这是一件显得特别清新的衬衣，采用了条纹的设计，给予人一种甜美可人的气质。并且融合了别致的斜肩一字领设计，高调的展示出性感的锁骨，将迷人的香肩展现在外，性感中不失去清纯的气息。袖口处的蝴蝶结系带装饰，增添了俏皮的韵味，简洁大方。且在下摆处采用了不对称的设计，增强了视觉效果，更显潮流。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "作为基础款单品，牛仔裤也<UNK><UNK>，想要呈现给大家的是——每次搭配都有新感觉。裤子经过复古做旧处理，风格鲜明，也很注重细节，连纽扣也做了统一的做旧处理，融入个性十足的磨破设计，高腰直筒basic裤型，修饰身材，穿出高挑长腿。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "一款温暖柔软又富有弹性的针织衫，不仅可以抵御严寒侵袭，还能更好地进行搭配。v领的设计，能勾勒出迷人的天鹅颈以及衬托出娇小的脸型。宽松又别致的剪裁，能从视觉上显露纤长的下半身，起到显瘦的效果。直筒造型的袖子，修饰出优美的手臂线条，衣身上的方格刺绣，时尚又吸睛。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "绿色的衣身上镶嵌着<UNK>，就是这款衬衫最大的迷人之处，“红花配绿叶”般的色调，将清新气息阐述的淋漓尽致。经典的翻领更是贴心，修饰颈部线条的同时，尽显精致干练的气质，出街轻松凹造型。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这款外套采用了撞色拉链织带以及字母印花设计。这两种元素的融入使外套不会显得过于单调沉闷，吸睛而亮眼，充满年轻与朝气感，非常减龄。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "本款连衣裙整体采用h型的轮廓设计，藏肉显瘦，不挑身材，适合各种身形的人穿着。小翻领的领口设计，使得本款连衣裙穿在身上看起来十分的精神帅气，具有青春活力。单排扣的衣门襟设计，又给本款连衣裙带来了一丝的复古味道。裙身上的刺绣花朵装饰，使得本款连衣裙不显得单调，富有层次感，上身给人一种独特的时尚魅力。长袖的设计，更加的贴合手臂曲线，上身更加的舒适贴身。"
+            },
+            {
+                "role": "assistant",
+                "content": "0"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "我感到被背叛了，这让我感到很心痛和失望。"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这件事情让我感到很羞耻和尴尬。"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "我感到很内疚和自责，因为我没有做好。"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这段关系让我感到很疲惫和无力，我不知道该怎么办。"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "我的心情很沉重，我无法摆脱这种负面情绪。"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    },
+    {
+        "messages": [
+            {
+                "role": "user",
+                "content": "这件事情让我感到很愤怒和不满。"
+            },
+            {
+                "role": "assistant",
+                "content": "1"
+            }
+        ]
+    }
+]
--- a/demo/hf_based_demo.py
+++ b/demo/hf_based_demo.py
+from typing import Dict
+from typing import List
+from typing import Tuple
+
+import argparse
+import gradio as gr
+import torch
+from threading import Thread
+from transformers import (
+    AutoModelForCausalLM, 
+    AutoTokenizer,
+    TextIteratorStreamer
+)
+import warnings
+warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--model_path", type=str, default="")
+parser.add_argument("--torch_dtype", type=str, default="bfloat16", choices=["float32", "bfloat16"])
+parser.add_argument("--server_name", type=str, default="127.0.0.1")
+parser.add_argument("--server_port", type=int, default=7860)
+args = parser.parse_args()
+
+# init model torch dtype
+torch_dtype = args.torch_dtype
+if torch_dtype =="" or torch_dtype == "bfloat16":
+    torch_dtype = torch.bfloat16
+elif torch_dtype == "float32":
+    torch_dtype = torch.float32
+else:
+    raise ValueError(f"Invalid torch dtype: {torch_dtype}")
+
+# init model and tokenizer
+path = args.model_path
+tokenizer = AutoTokenizer.from_pretrained(path)
+model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch_dtype, device_map="auto", trust_remote_code=True)
+
+# init gradio demo host and port
+server_name=args.server_name
+server_port=args.server_port
+
+def hf_gen(dialog: List, top_p: float, temperature: float, repetition_penalty: float, max_dec_len: int):
+    """generate model output with huggingface api
+
+    Args:
+        query (str): actual model input.
+        top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+        temperature (float): Strictly positive float value used to modulate the logits distribution.
+        max_dec_len (int): The maximum numbers of tokens to generate.
+
+    Yields:
+        str: real-time generation results of hf model
+    """    
+    inputs = tokenizer.apply_chat_template(dialog, tokenize=False, add_generation_prompt=False)
+    enc = tokenizer(inputs, return_tensors="pt").to("cuda")
+    streamer = TextIteratorStreamer(tokenizer)
+    generation_kwargs = dict(
+        enc,
+        do_sample=True,
+        top_k=0,
+        top_p=top_p,
+        temperature=temperature,
+        repetition_penalty=repetition_penalty,
+        max_new_tokens=max_dec_len,
+        pad_token_id=tokenizer.eos_token_id,
+        streamer=streamer,
+    )
+    thread = Thread(target=model.generate, kwargs=generation_kwargs)
+    thread.start()
+    answer = ""
+    for new_text in streamer:
+        answer += new_text
+        yield answer[4 + len(inputs):]
+
+
+def generate(chat_history: List, query: str, top_p: float, temperature: float, repetition_penalty: float, max_dec_len: int):
+    """generate after hitting "submit" button
+
+    Args:
+        chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
+        query (str): query of current round
+        top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+        temperature (float): strictly positive float value used to modulate the logits distribution.
+        max_dec_len (int): The maximum numbers of tokens to generate.
+
+    Yields:
+        List: [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n], [q_n+1, a_n+1]]. chat_history + QA of current round.
+    """    
+    assert query != "", "Input must not be empty!!!"
+    # apply chat template
+    model_input = []
+    for q, a in chat_history:
+        model_input.append({"role": "user", "content": q})
+        model_input.append({"role": "assistant", "content": a})
+    model_input.append({"role": "user", "content": query})
+    # yield model generation
+    chat_history.append([query, ""])
+    for answer in hf_gen(model_input, top_p, temperature, repetition_penalty, max_dec_len):
+        chat_history[-1][1] = answer.strip("</s>")
+        yield gr.update(value=""), chat_history
+
+
+def regenerate(chat_history: List, top_p: float, temperature: float, repetition_penalty: float, max_dec_len: int):
+    """re-generate the answer of last round's query
+
+    Args:
+        chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
+        top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+        temperature (float): strictly positive float value used to modulate the logits distribution.
+        max_dec_len (int): The maximum numbers of tokens to generate.
+
+    Yields:
+        List: [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. chat_history
+    """    
+    assert len(chat_history) >= 1, "History is empty. Nothing to regenerate!!"
+    # apply chat template
+    model_input = []
+    for q, a in chat_history[:-1]:
+        model_input.append({"role": "user", "content": q})
+        model_input.append({"role": "assistant", "content": a})
+    model_input.append({"role": "user", "content": chat_history[-1][0]})
+    # yield model generation
+    for answer in hf_gen(model_input, top_p, temperature, repetition_penalty, max_dec_len):
+        chat_history[-1][1] = answer.strip("</s>")
+        yield gr.update(value=""), chat_history
+
+
+def clear_history():
+    """clear all chat history
+
+    Returns:
+        List: empty chat history
+    """    
+    return []
+
+
+def reverse_last_round(chat_history):
+    """reverse last round QA and keep the chat history before
+
+    Args:
+        chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
+
+    Returns:
+        List: [[q_1, a_1], [q_2, a_2], ..., [q_n-1, a_n-1]]. chat_history without last round.
+    """    
+    assert len(chat_history) >= 1, "History is empty. Nothing to reverse!!"
+    return chat_history[:-1]
+
+
+# launch gradio demo
+with gr.Blocks(theme="soft") as demo:
+    gr.Markdown("""# MiniCPM Gradio Demo""")
+
+    with gr.Row():
+        with gr.Column(scale=1):
+            top_p = gr.Slider(0, 1, value=0.8, step=0.1, label="top_p")
+            temperature = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="temperature")
+            repetition_penalty = gr.Slider(0.1, 2.0, value=1.1, step=0.1, label="repetition_penalty")
+            max_dec_len = gr.Slider(1, 1024, value=1024, step=1, label="max_dec_len")
+        with gr.Column(scale=5):
+            chatbot = gr.Chatbot(bubble_full_width=False, height=400)
+            user_input = gr.Textbox(label="User", placeholder="Input your query here!", lines=8)
+            with gr.Row():
+                submit = gr.Button("Submit")
+                clear = gr.Button("Clear")
+                regen = gr.Button("Regenerate")
+                reverse = gr.Button("Reverse")
+
+    submit.click(generate, inputs=[chatbot, user_input, top_p, temperature, repetition_penalty, max_dec_len], outputs=[user_input, chatbot])
+    regen.click(regenerate, inputs=[chatbot, top_p, temperature, repetition_penalty, max_dec_len], outputs=[user_input, chatbot])
+    clear.click(clear_history, inputs=[], outputs=[chatbot])
+    reverse.click(reverse_last_round, inputs=[chatbot], outputs=[chatbot])
+
+demo.queue()
+demo.launch(server_name=server_name, server_port=server_port, show_error=True)
--- a/demo/vllm_based_demo.py
+++ b/demo/vllm_based_demo.py
+from typing import Dict
+from typing import List
+from typing import Tuple
+
+import argparse
+import gradio as gr
+from vllm import LLM, SamplingParams
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--model_path", type=str, default="")
+parser.add_argument("--torch_dtype", type=str, default="bfloat16", choices=["float32", "bfloat16"])
+parser.add_argument("--server_name", type=str, default="127.0.0.1")
+parser.add_argument("--server_port", type=int, default=7860)
+args = parser.parse_args()
+
+
+# init model torch dtype
+torch_dtype = args.torch_dtype
+if torch_dtype =="" or torch_dtype == "bfloat16":
+    torch_dtype = "bfloat16"
+elif torch_dtype == "float32":
+    torch_dtype = "float32"
+else:
+    raise ValueError(f"Invalid torch dtype: {torch_dtype}")
+
+# init model and tokenizer
+path = args.model_path
+llm = LLM(model=path, tensor_parallel_size=1, dtype=torch_dtype)
+
+# init gradio demo host and port
+server_name=args.server_name
+server_port=args.server_port
+
+def vllm_gen(dialog: List, top_p: float, temperature: float, max_dec_len: int):
+    """generate model output with huggingface api
+
+    Args:
+        query (str): actual model input.
+        top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+        temperature (float): Strictly positive float value used to modulate the logits distribution.
+        max_dec_len (int): The maximum numbers of tokens to generate.
+
+    Yields:
+        str: real-time generation results of hf model
+    """    
+    prompt = ""
+    assert len(dialog) % 2 == 1
+    for info in dialog:
+        if info["role"] == "user":
+            prompt += "<用户>" + info["content"]
+        else:
+            prompt += "<AI>" + info["content"]
+    prompt += "<AI>"
+    params_dict = {
+        "n": 1,
+        "best_of": 1,
+        "presence_penalty": 1.0,    
+        "frequency_penalty": 0.0,
+        "temperature": temperature,
+        "top_p": top_p,
+        "top_k": -1,
+        "use_beam_search": False,
+        "length_penalty": 1,
+        "early_stopping": False,
+        "stop": None,
+        "stop_token_ids": None,
+        "ignore_eos": False,
+        "max_tokens": max_dec_len,
+        "logprobs": None,
+        "prompt_logprobs": None,
+        "skip_special_tokens": True,
+    }
+    sampling_params = SamplingParams(**params_dict)
+    outputs = llm.generate(prompts=prompt, sampling_params=sampling_params)[0]
+    generated_text = outputs.outputs[0].text
+    return generated_text
+
+
+def generate(chat_history: List, query: str, top_p: float, temperature: float, max_dec_len: int):
+    """generate after hitting "submit" button
+
+    Args:
+        chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
+        query (str): query of current round
+        top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+        temperature (float): strictly positive float value used to modulate the logits distribution.
+        max_dec_len (int): The maximum numbers of tokens to generate.
+
+    Yields:
+        List: [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n], [q_n+1, a_n+1]]. chat_history + QA of current round.
+    """    
+    assert query != "", "Input must not be empty!!!"
+    # apply chat template
+    model_input = []
+    for q, a in chat_history:
+        model_input.append({"role": "user", "content": q})
+        model_input.append({"role": "assistant", "content": a})
+    model_input.append({"role": "user", "content": query})
+    # yield model generation
+    model_output = vllm_gen(model_input, top_p, temperature, max_dec_len)
+    chat_history.append([query, model_output])
+    return gr.update(value=""), chat_history
+
+
+def regenerate(chat_history: List, top_p: float, temperature: float, max_dec_len: int):
+    """re-generate the answer of last round's query
+
+    Args:
+        chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
+        top_p (float): only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+        temperature (float): strictly positive float value used to modulate the logits distribution.
+        max_dec_len (int): The maximum numbers of tokens to generate.
+
+    Yields:
+        List: [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. chat_history
+    """    
+    assert len(chat_history) >= 1, "History is empty. Nothing to regenerate!!"
+    # apply chat template
+    model_input = []
+    for q, a in chat_history[:-1]:
+        model_input.append({"role": "user", "content": q})
+        model_input.append({"role": "assistant", "content": a})
+    model_input.append({"role": "user", "content": chat_history[-1][0]})
+    # yield model generation
+    model_output = vllm_gen(model_input, top_p, temperature, max_dec_len)
+    chat_history[-1][1] = model_output
+    return gr.update(value=""), chat_history
+
+
+def clear_history():
+    """clear all chat history
+
+    Returns:
+        List: empty chat history
+    """    
+    return []
+
+
+def reverse_last_round(chat_history):
+    """reverse last round QA and keep the chat history before
+
+    Args:
+        chat_history (List): [[q_1, a_1], [q_2, a_2], ..., [q_n, a_n]]. list that stores all QA records
+
+    Returns:
+        List: [[q_1, a_1], [q_2, a_2], ..., [q_n-1, a_n-1]]. chat_history without last round.
+    """    
+    assert len(chat_history) >= 1, "History is empty. Nothing to reverse!!"
+    return chat_history[:-1]
+
+
+# launch gradio demo
+with gr.Blocks(theme="soft") as demo:
+    gr.Markdown("""# MiniCPM Gradio Demo""")
+
+    with gr.Row():
+        with gr.Column(scale=1):
+            top_p = gr.Slider(0, 1, value=0.8, step=0.1, label="top_p")
+            temperature = gr.Slider(0.1, 2.0, value=0.5, step=0.1, label="temperature")
+            max_dec_len = gr.Slider(1, 1024, value=1024, step=1, label="max_dec_len")
+        with gr.Column(scale=5):
+            chatbot = gr.Chatbot(bubble_full_width=False, height=400)
+            user_input = gr.Textbox(label="User", placeholder="Input your query here!", lines=8)
+            with gr.Row():
+                submit = gr.Button("Submit")
+                clear = gr.Button("Clear")
+                regen = gr.Button("Regenerate")
+                reverse = gr.Button("Reverse")
+
+    submit.click(generate, inputs=[chatbot, user_input, top_p, temperature, max_dec_len], outputs=[user_input, chatbot])
+    regen.click(regenerate, inputs=[chatbot, top_p, temperature, max_dec_len], outputs=[user_input, chatbot])
+    clear.click(clear_history, inputs=[], outputs=[chatbot])
+    reverse.click(reverse_last_round, inputs=[chatbot], outputs=[chatbot])
+
+demo.queue()
+demo.launch(server_name=server_name, server_port=server_port, show_error=True)
--- a/doc/transformer.png
+++ b/doc/transformer.png
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
+FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk23.10-py38
+ENV DEBIAN_FRONTEND=noninteractive
+# RUN yum update && yum install -y git cmake wget build-essential
+RUN source /opt/dtk-23.10/env.sh
+# 安装pip相关依赖
+COPY requirements.txt requirements.txt
+RUN pip3 install -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com -r requirements.txt
+
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
+# for finetune
+jieba>=0.42.1
+ruamel_yaml>=0.18.5
+rouge_chinese>=1.0.3
+jupyter>=1.0.0
+datasets>=2.16.1
+peft>=0.7.1
+transformers==4.37.2
+# deepspeed>=0.13.1
+# flash_attn>=2.5.1
--- a/finetune/README.md
+++ b/finetune/README.md
+# MiniCPM 微调
+
+[English Version](https://github.com/OpenBMB/MiniCPM/blob/main/finetune/README_en.md)
+
+本目录提供 MiniCPM-2B 模型的微调示例，包括全量微调和 PEFT。格式上，提供多轮对话微调样例和输入输出格式微调样例。
+
+如果将模型下载到了本地，本文和代码中的 `OpenBMB/MiniCPM-2B` 字段均应替换为相应地址以从本地加载模型。
+
+运行示例需要 `python>=3.10`，除基础的 `torch` 依赖外，示例代码运行还需要依赖。
+
+**我们提供了 [示例notebook](lora_finetune.ipynb) 用于演示如何以 AdvertiseGen 为例处理数据和使用微调脚本。**
+
+```bash
+pip install -r requirements.txt
+```
+
+## 测试硬件标准
+
+我们仅提供了单机多卡/多机多卡的运行示例，因此您需要至少一台具有多个 GPU 的机器。本仓库中的**默认配置文件**中，我们记录了显存的占用情况：
+
+ SFT 全量微调: 4张显卡平均分配，每张显卡占用 `30245MiB` 显存。
+ LORA 微调: 1张显卡，占用 `10619MiB` 显存。
+
+> 请注意，该结果仅供参考，对于不同的参数，显存占用可能会有所不同。请结合你的硬件情况进行调整。
+
+## 多轮对话格式
+
+多轮对话微调示例采用 ChatGLM3 对话格式约定，对不同角色添加不同 `loss_mask` 从而在一遍计算中为多轮回复计算 `loss`。
+
+对于数据文件，样例采用如下格式
+
+```json
+[
+  {
+    "messages": [
+      {
+        "role": "system",
+        "content": "<system prompt text>"
+      },
+      {
+        "role": "user",
+        "content": "<user prompt text>"
+      },
+      {
+        "role": "assistant",
+        "content": "<assistant response text>"
+      },
+      // ... Muti Turn
+      {
+        "role": "user",
+        "content": "<user prompt text>"
+      },
+      {
+        "role": "assistant",
+        "content": "<assistant response text>"
+      }
+    ]
+  }
+  // ...
+]
+```
+
+## 数据集格式示例
+
+这里以 AdvertiseGen 数据集为例,
+您可以从 [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing)
+或者 [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) 下载 AdvertiseGen 数据集。
+将解压后的 AdvertiseGen 目录放到 `data` 目录下并自行转换为如下格式数据集。
+
+> 请注意，现在的微调代码中加入了验证集，因此，对于一组完整的微调数据集，必须包含训练数据集和验证数据集，测试数据集可以不填写。或者直接用验证数据集代替。
+
+```
+{"messages": [{"role": "user", "content": "类型#裙*裙长#半身裙"}, {"role": "assistant", "content": "这款百搭时尚的仙女半身裙，整体设计非常的飘逸随性，穿上之后每个女孩子都能瞬间变成小仙女啦。料子非常的轻盈，透气性也很好，穿到夏天也很舒适。"}]}
+```
+
+## 开始微调
+
+通过以下代码执行 **单机多卡/多机多卡** 运行。
+
+```bash
+cd finetune
+bash sft_finetune.sh
+```
+
+通过以下代码执行 **单机单卡** 运行。
+
+```angular2html
+cd finetune
+bash lora_finetune.sh
+```
--- a/finetune/README_en.md
+++ b/finetune/README_en.md
+# MiniCPM Fine-tuning
+
+[中文版](https://github.com/OpenBMB/MiniCPM/blob/main/finetune/README.md)
+
+This directory provides examples of fine-tuning the MiniCPM-2B model, including full model fine-tuning and PEFT. In terms of format, we offer examples for multi-turn dialogue fine-tuning and input-output format fine-tuning.
+
+If you have downloaded the model to your local system, the `OpenBMB/MiniCPM-2B` field mentioned in this document and in the code should be replaced with the corresponding address to load the model from your local system.
+
+Running the example requires `python>=3.10`. Besides the basic `torch` dependency, additional dependencies are needed to run the example code.
+
+
+
+**We have provided an [example notebook](lora_finetune.ipynb) to demonstrate how to process data and use the fine-tuning script with AdvertiseGen as an example.**
+
+```bash
+pip install -r requirements.txt
+```
+
+## Testing Hardware Standard
+
+We only provide examples for single-node multi-GPU/multi-node multi-GPU setups, so you will need at least one machine with multiple GPUs. In the **default configuration file** in this repository, we have documented the memory usage:
+
+ SFT full parameters fine-tuning: Evenly distributed across 4 GPUs, each GPU consumes `30245MiB` of memory.
+ LORA fine-tuning: One GPU, consuming `10619MiB`  of memory.。
+
+> Please note that these results are for reference only, and memory consumption may vary with different parameters. Please adjust according to your hardware situation.
+
+## Multi-Turn Dialogue Format
+
+The multi-turn dialogue fine-tuning example adopts the ChatGLM3 dialogue format convention, adding different `loss_mask` for different roles, thus calculating `loss` for multiple replies in one computation.
+
+For the data file, the example uses the following format
+
+```json
+[
+  {
+    "messages": [
+      {
+        "role": "system",
+        "content": "<system prompt text>"
+      },
+      {
+        "role": "user",
+        "content": "<user prompt text>"
+      },
+      {
+        "role": "assistant",
+        "content": "<assistant response text>"
+      },
+      // ... Muti Turn
+      {
+        "role": "user",
+        "content": "<user prompt text>"
+      },
+      {
+        "role": "assistant",
+        "content": "<assistant response text>"
+      }
+    ]
+  }
+  // ...
+]
+```
+
+## Dataset Format Example
+
+Here, taking the AdvertiseGen dataset as an example,
+you can download the AdvertiseGen dataset from [Google Drive](https://drive.google.com/file/d/13_vf0xRTQsyneRKdD1bZIr93vBGOczrk/view?usp=sharing)
+or [Tsinghua Cloud](https://cloud.tsinghua.edu.cn/f/b3f119a008264b1cabd1/?dl=1) . After extracting the AdvertiseGen directory, place it in the `data` directory and convert it into the following format dataset.
+
+
+> Please note, the fine-tuning code now includes a validation set, so for a complete set of fine-tuning datasets, it must contain training and validation datasets, while the test dataset is optional. Or, you can use the validation dataset in place of it.
+
+```
+{"messages": [{"role": "user", "content": "类型#裙*裙长#半身裙"}, {"role": "assistant", "content": "这款百搭时尚的仙女半身裙，整体设计非常的飘逸随性，穿上之后每个女孩子都能瞬间变成小仙女啦。料子非常的轻盈，透气性也很好，穿到夏天也很舒适。"}]}
+```
+
+## Start Fine-tuning
+
+Execute **single-node multi-GPU/multi-node multi-GPU** runs with the following code.
+
+```bash
+cd finetune
+bash sft_finetune.sh
+```
+
+Execute **single-node single-GPU** runs with the following code.
+
+```angular2html
+cd finetune
+bash lora_finetune.sh
+```