Commit 112bf76b authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #1826 canceled with stages
import json
import math
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import torch
import transformers
from PIL import Image
from tqdm import tqdm
import torchaudio
from vita import conversation as conversation_lib
from vita.config import *
from vita.config import AudioFolder, FolderDict
from vita.config.dataset_config import *
from vita.constants import AUDIO_TOKEN_INDEX, GLOBAL_WEIGHTS_PATH, IGNORE_INDEX, IMAGE_TOKEN_INDEX
from vita.util.data_utils_video_audio import DataArguments, LazySupervisedDataset
from vita.util.data_utils_video_audio_neg_patch import find_closest_aspect_ratio
from vita.util.mm_utils import tokenizer_image_audio_token, tokenizer_image_token
image_token_num = 256
token_thre = 9500
# datasets = NLP + HumanCentric + VideoQA + NaturalQA + VideoCap + OCRCap + NaturalCap
datasets = NaturalCap + OCRCap + VideoCap + NaturalQA
# datasets = VideoQA + HumanCentric + NLP
# datasets = [SGInternvid0]
datasets = [TextSFT, TextSFT2_0]
out_file_name = "debug.json"
parser = transformers.HfArgumentParser((DataArguments))
tokenizer = transformers.AutoTokenizer.from_pretrained(
f"{GLOBAL_WEIGHTS_PATH}/Mixtral-8x7B_New/mg2hg",
cache_dir=None,
model_max_length=8192,
padding_side="right",
use_fast=True,
)
long_json = []
def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=True):
orig_width, orig_height = image.size
aspect_ratio = orig_width / orig_height
# calculate the existing image aspect ratio
target_ratios = set(
(i, j)
for n in range(min_num, max_num + 1)
for i in range(1, n + 1)
for j in range(1, n + 1)
if i * j <= max_num and i * j >= min_num
)
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
# find the closest aspect ratio to the target
target_aspect_ratio = find_closest_aspect_ratio(
aspect_ratio, target_ratios, orig_width, orig_height, image_size
)
# calculate the target width and height
target_width = image_size * target_aspect_ratio[0]
target_height = image_size * target_aspect_ratio[1]
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
if use_thumbnail and blocks != 1:
blocks += 1
return blocks
def get_wav_duration(file_path):
waveform, sample_rate = torchaudio.load(file_path)
duration = waveform.size(1) / sample_rate
return duration
def process_item(item, conv, roles, tokenizer):
source = item["conversations"]
conv.messages = []
for j, sentence in enumerate(source):
role = roles[sentence["from"]]
assert role == conv.roles[j % 2], f"{source}"
conv.append_message(role, sentence["value"])
prompt = conv.get_prompt()
input_ids = tokenizer_image_token(prompt, tokenizer, return_tensors="pt")
item_token_num = input_ids.shape[0]
if "image" in item:
image_file = item["image"]
if isinstance(image_file, str):
image_file = [image_file]
set_id = item["set"]
if isinstance(set_id, str):
set_id = [set_id]
for k, img_file in enumerate(image_file):
if set_id[k] not in NoPatchSets:
image_directory = FolderDict[set_id[k]]
image = Image.open(
os.path.join(image_directory, img_file.replace("\\", "/"))
).convert("RGB")
num_patches = dynamic_preprocess(image)
else:
num_patches = 1
item_token_num += num_patches * image_token_num
total_duration = 0
if "audio" in item:
audio_files = item["audio"]
audio_directory = AudioFolder
if isinstance(audio_files, str):
audio_files = [audio_files]
assert isinstance(audio_files, list)
for audio_file_name in audio_files:
audio_file_path = os.path.join(audio_directory, "audio", audio_file_name)
duration = get_wav_duration(audio_file_path)
duration = (
math.ceil(duration) if math.ceil(duration) % 2 == 0 else math.ceil(duration) + 1
)
total_duration += duration
item_token_num += math.ceil(total_duration * 12.5)
if item_token_num > token_thre:
print(f"item_token_num: {item_token_num}")
if len(item["image"]) >= 16:
print(f"num_patches: {num_patches}")
print(f"total_duration: {total_duration}")
long_json.append(item)
print(item)
return item_token_num
for dataset in datasets:
json_file_path = dataset["chat_path"]
with open(json_file_path, "r", encoding="utf-8") as file:
data = json.load(file)
conv = conversation_lib.default_conversation.copy()
roles = {"human": conv.roles[0], "gpt": conv.roles[1]}
len_list = []
with ThreadPoolExecutor() as executor:
futures = [executor.submit(process_item, item, conv, roles, tokenizer) for item in data]
for future in tqdm(as_completed(futures), total=len(futures)):
len_list.append(future.result())
assert len(len_list) == len(data)
distribution = {
"0-100": 0,
"100-200": 0,
"200-300": 0,
"300-400": 0,
"400-500": 0,
"500-600": 0,
"600-700": 0,
"700-800": 0,
"800-900": 0,
"900-1000": 0,
"1000-1500": 0,
"1500-2000": 0,
"2000-2500": 0,
"2500-3000": 0,
"3000-3500": 0,
"3500-4000": 0,
"4000-4500": 0,
"4500-5000": 0,
"5000-5500": 0,
"5500-6000": 0,
"6000-6500": 0,
"6500-7000": 0,
"7000-7500": 0,
"7500-8000": 0,
"8000-8500": 0,
"8500-9000": 0,
"9000-9500": 0,
"9500-10000": 0,
">10000": 0,
}
for length in len_list:
if length <= 100:
distribution["0-100"] += 1
elif length <= 200:
distribution["100-200"] += 1
elif length <= 300:
distribution["200-300"] += 1
elif length <= 400:
distribution["300-400"] += 1
elif length <= 500:
distribution["400-500"] += 1
elif length <= 600:
distribution["500-600"] += 1
elif length <= 700:
distribution["600-700"] += 1
elif length <= 800:
distribution["700-800"] += 1
elif length <= 900:
distribution["800-900"] += 1
elif length <= 1000:
distribution["900-1000"] += 1
elif length <= 1500:
distribution["1000-1500"] += 1
elif length <= 2000:
distribution["1500-2000"] += 1
elif length <= 2500:
distribution["2000-2500"] += 1
elif length <= 3000:
distribution["2500-3000"] += 1
elif length <= 3500:
distribution["3000-3500"] += 1
elif length <= 4000:
distribution["3500-4000"] += 1
elif length <= 4500:
distribution["4000-4500"] += 1
elif length <= 5000:
distribution["4500-5000"] += 1
elif length <= 5500:
distribution["5000-5500"] += 1
elif length <= 6000:
distribution["5500-6000"] += 1
elif length <= 6500:
distribution["6000-6500"] += 1
elif length <= 7000:
distribution["6500-7000"] += 1
elif length <= 7500:
distribution["7000-7500"] += 1
elif length <= 8000:
distribution["7500-8000"] += 1
elif length <= 8500:
distribution["8000-8500"] += 1
elif length <= 9000:
distribution["8500-9000"] += 1
elif length <= 9500:
distribution["9000-9500"] += 1
elif length <= 10000:
distribution["9500-10000"] += 1
else:
distribution[">10000"] += 1
print(f"Length distribution of {json_file_path}:")
for key, value in distribution.items():
print(f"{key}: {value}")
# with open(out_file_name, 'w', encoding='utf-8') as file:
# json.dump(long_json*10, file, ensure_ascii=False, indent=4)
# print(f"处理完成,大于{token_thre}的已保存到{out_file_name}")
import json
import math
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import numpy as np
import torch
import transformers
from PIL import Image
from tqdm import tqdm
import torchaudio
from decord import VideoReader, cpu
from vita import conversation as conversation_lib
from vita.config import *
from vita.config import AudioFolder, FolderDict
from vita.config.dataset_config import *
from vita.constants import (
DEFAULT_AUDIO_TOKEN,
DEFAULT_IMAGE_TOKEN,
DEFAULT_VIDEO_TOKEN,
GLOBAL_WEIGHTS_PATH,
IGNORE_INDEX,
MAX_IMAGE_LENGTH,
MIN_IMAGE_LENGTH,
)
from vita.util.data_utils_video_audio import DataArguments, LazySupervisedDataset
from vita.util.data_utils_video_audio_neg_patch import find_closest_aspect_ratio
from vita.util.mm_utils import tokenizer_image_audio_token, tokenizer_image_token
image_token_num = 256
token_thre = 9500
# datasets = NLP + HumanCentric + VideoQA + NaturalQA + VideoCap + OCRCap + NaturalCap
datasets = NaturalCap + OCRCap + VideoCap + NaturalQA
# datasets = VideoQA + HumanCentric + NLP
# datasets = [SGInternvid0]
# datasets = [TextSFT, TextSFT2_0]
datasets = VideoCap
out_file_name = "debug.json"
parser = transformers.HfArgumentParser((DataArguments))
tokenizer = transformers.AutoTokenizer.from_pretrained(
f"{GLOBAL_WEIGHTS_PATH}/Mixtral-8x7B_New/mg2hg",
cache_dir=None,
model_max_length=8192,
padding_side="right",
use_fast=True,
)
long_json = []
def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=True):
orig_width, orig_height = image.size
aspect_ratio = orig_width / orig_height
# calculate the existing image aspect ratio
target_ratios = set(
(i, j)
for n in range(min_num, max_num + 1)
for i in range(1, n + 1)
for j in range(1, n + 1)
if i * j <= max_num and i * j >= min_num
)
target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])
# find the closest aspect ratio to the target
target_aspect_ratio = find_closest_aspect_ratio(
aspect_ratio, target_ratios, orig_width, orig_height, image_size
)
# calculate the target width and height
target_width = image_size * target_aspect_ratio[0]
target_height = image_size * target_aspect_ratio[1]
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
if use_thumbnail and blocks != 1:
blocks += 1
return blocks
def get_wav_duration(file_path):
waveform, sample_rate = torchaudio.load(file_path)
duration = waveform.size(1) / sample_rate
return duration
def get_video_frame(
video_path,
max_frames=MAX_IMAGE_LENGTH,
min_frames=MIN_IMAGE_LENGTH,
video_framerate=1,
s=None,
e=None,
):
if s is None:
start_time, end_time = None, None
else:
start_time = int(s)
end_time = int(e)
start_time = start_time if start_time >= 0.0 else 0.0
end_time = end_time if end_time >= 0.0 else 0.0
if start_time > end_time:
start_time, end_time = end_time, start_time
elif start_time == end_time:
end_time = start_time + 1
if os.path.exists(video_path):
vreader = VideoReader(video_path, ctx=cpu(0))
else:
print(video_path)
raise FileNotFoundError
fps = vreader.get_avg_fps()
f_start = 0 if start_time is None else int(start_time * fps)
f_end = int(min(1000000000 if end_time is None else end_time * fps, len(vreader) - 1))
num_frames = f_end - f_start + 1
if num_frames > 0:
# T x 3 x H x W
sample_fps = int(video_framerate)
t_stride = int(round(float(fps) / sample_fps))
all_pos = list(range(f_start, f_end + 1, t_stride))
if len(all_pos) > max_frames:
sample_pos = [
all_pos[_] for _ in np.linspace(0, len(all_pos) - 1, num=max_frames, dtype=int)
]
elif len(all_pos) < min_frames:
sample_pos = [
all_pos[_] for _ in np.linspace(0, len(all_pos) - 1, num=min_frames, dtype=int)
]
else:
sample_pos = all_pos
return len(sample_pos)
def process_item(item, conv, roles, tokenizer):
source = item["conversations"]
conv.messages = []
for j, sentence in enumerate(source):
role = roles[sentence["from"]]
assert role == conv.roles[j % 2], f"{source}"
conv.append_message(role, sentence["value"])
prompt = conv.get_prompt()
input_ids = tokenizer_image_token(prompt, tokenizer, return_tensors="pt")
item_token_num = input_ids.shape[0]
if "image" in item:
image_file = item["image"]
if isinstance(image_file, str):
image_file = [image_file]
set_id = item["set"]
if isinstance(set_id, str):
set_id = [set_id]
for k, img_file in enumerate(image_file):
if set_id[k] not in NoPatchSets:
image_directory = FolderDict[set_id[k]]
image = Image.open(
os.path.join(image_directory, img_file.replace("\\", "/"))
).convert("RGB")
num_patches = dynamic_preprocess(image)
else:
num_patches = 1
item_token_num += num_patches * image_token_num
if "video" in item:
video_file = item["video"]
if isinstance(video_file, str):
video_file = [video_file]
set_id = item["set"]
if isinstance(set_id, str):
set_id = [set_id]
for k, video_file_name in enumerate(video_file):
video_directory = FolderDict[set_id[k]]
video_file_path = os.path.join(video_directory, video_file_name)
num_frame = get_video_frame(video_file_path)
item_token_num += num_frame * image_token_num
total_duration = 0
if "audio" in item:
audio_files = item["audio"]
audio_directory = AudioFolder
if isinstance(audio_files, str):
audio_files = [audio_files]
assert isinstance(audio_files, list)
for audio_file_name in audio_files:
audio_file_path = os.path.join(audio_directory, "audio", audio_file_name)
duration = get_wav_duration(audio_file_path)
duration = (
math.ceil(duration) if math.ceil(duration) % 2 == 0 else math.ceil(duration) + 1
)
total_duration += duration
item_token_num += math.ceil(total_duration * 12.5)
if item_token_num > token_thre:
print(f"item_token_num: {item_token_num}")
if len(item["image"]) >= 16:
print(f"num_patches: {num_patches}")
print(f"total_duration: {total_duration}")
long_json.append(item)
print(item)
return item_token_num
for dataset in datasets:
json_file_path = dataset["chat_path"]
with open(json_file_path, "r", encoding="utf-8") as file:
data = json.load(file)
conv = conversation_lib.default_conversation.copy()
roles = {"human": conv.roles[0], "gpt": conv.roles[1]}
len_list = []
with ThreadPoolExecutor() as executor:
futures = [executor.submit(process_item, item, conv, roles, tokenizer) for item in data]
for future in tqdm(as_completed(futures), total=len(futures)):
len_list.append(future.result())
assert len(len_list) == len(data)
distribution = {
"0-100": 0,
"100-200": 0,
"200-300": 0,
"300-400": 0,
"400-500": 0,
"500-600": 0,
"600-700": 0,
"700-800": 0,
"800-900": 0,
"900-1000": 0,
"1000-1500": 0,
"1500-2000": 0,
"2000-2500": 0,
"2500-3000": 0,
"3000-3500": 0,
"3500-4000": 0,
"4000-4500": 0,
"4500-5000": 0,
"5000-5500": 0,
"5500-6000": 0,
"6000-6500": 0,
"6500-7000": 0,
"7000-7500": 0,
"7500-8000": 0,
"8000-8500": 0,
"8500-9000": 0,
"9000-9500": 0,
"9500-10000": 0,
">10000": 0,
}
for length in len_list:
if length <= 100:
distribution["0-100"] += 1
elif length <= 200:
distribution["100-200"] += 1
elif length <= 300:
distribution["200-300"] += 1
elif length <= 400:
distribution["300-400"] += 1
elif length <= 500:
distribution["400-500"] += 1
elif length <= 600:
distribution["500-600"] += 1
elif length <= 700:
distribution["600-700"] += 1
elif length <= 800:
distribution["700-800"] += 1
elif length <= 900:
distribution["800-900"] += 1
elif length <= 1000:
distribution["900-1000"] += 1
elif length <= 1500:
distribution["1000-1500"] += 1
elif length <= 2000:
distribution["1500-2000"] += 1
elif length <= 2500:
distribution["2000-2500"] += 1
elif length <= 3000:
distribution["2500-3000"] += 1
elif length <= 3500:
distribution["3000-3500"] += 1
elif length <= 4000:
distribution["3500-4000"] += 1
elif length <= 4500:
distribution["4000-4500"] += 1
elif length <= 5000:
distribution["4500-5000"] += 1
elif length <= 5500:
distribution["5000-5500"] += 1
elif length <= 6000:
distribution["5500-6000"] += 1
elif length <= 6500:
distribution["6000-6500"] += 1
elif length <= 7000:
distribution["6500-7000"] += 1
elif length <= 7500:
distribution["7000-7500"] += 1
elif length <= 8000:
distribution["7500-8000"] += 1
elif length <= 8500:
distribution["8000-8500"] += 1
elif length <= 9000:
distribution["8500-9000"] += 1
elif length <= 9500:
distribution["9000-9500"] += 1
elif length <= 10000:
distribution["9500-10000"] += 1
else:
distribution[">10000"] += 1
print(f"Length distribution of {json_file_path}:")
for key, value in distribution.items():
print(f"{key}: {value}")
# with open(out_file_name, 'w', encoding='utf-8') as file:
# json.dump(long_json*10, file, ensure_ascii=False, indent=4)
# print(f"处理完成,大于{token_thre}的已保存到{out_file_name}")
FROM image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.3.0-ubuntu22.04-dtk24.04.2-py3.10
ENV DEBIAN_FRONTEND=noninteractive
# RUN yum update && yum install -y git cmake wget build-essential
# RUN source /opt/dtk-24.04.2/env.sh
# # 安装pip相关依赖
COPY requirements.txt requirements.txt
RUN pip3 install -r requirements.txt -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com
accelerate==0.30.1
decord==0.6.0
#deepspeed==0.9.5
Jinja2==3.1.4
ninja==1.11.1.1
numpy==1.26.4
#torch==2.3.1
#torchaudio==2.3.1
#torchvision
tqdm==4.66.4
transformers==4.41.1
#xformers
timm
soundfile==0.12.1
icon.png

53.8 KB

# Text query
HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --question "请描述这张图片。"
# Audio query
# HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --audio_path asset/q1.wav
# Noisy audio query
# HIP_VISIBLE_DEVICES=0,1 python video_audio_demo.py --model_path VITA/VITA_ckpt --image_path asset/vita_log2.png --model_type mixtral-8x7b --conv_mode mixtral_two --audio_path asset/q2.wav
# -*- coding: utf-8 -*-
import time
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from vita.constants import GLOBAL_WEIGHTS_PATH
model_dir = f"{GLOBAL_WEIGHTS_PATH}/Mixtral-8x7B_New/mg2hg"
tokenizer = AutoTokenizer.from_pretrained(model_dir)
# 给定的 ID 列表
id_list = [
1,
1587,
28747,
29383,
28971,
28518,
32350,
33702,
28944,
13,
28733,
28705,
29383,
28971,
32569,
32730,
32606,
28914,
29050,
35267,
32315,
28944,
29383,
28914,
32626,
39797,
28971,
32311,
29041,
41993,
29958,
46454,
28944,
13,
28733,
28705,
29383,
32585,
32474,
32599,
32683,
28914,
29292,
29824,
35267,
32100,
44797,
33089,
29457,
38038,
32599,
28914,
32509,
28944,
13,
28733,
47068,
32599,
38201,
29383,
37676,
28914,
34559,
35845,
28924,
29383,
29179,
29478,
32599,
41534,
29457,
29551,
32599,
35702,
34415,
28914,
35845,
28944,
2,
28705,
13,
1838,
28747,
]
id_list = [
28991,
34275,
29105,
33216,
30344,
29675,
28914,
46018,
29131,
29086,
28944,
29087,
29960,
28991,
34700,
43072,
28914,
28971,
28518,
29046,
]
id_list = [
28705,
13,
2,
28705,
13,
10093,
28747,
51497,
40994,
30162,
32980,
39944,
29105,
28518,
41772,
28914,
34796,
32703,
28924,
29450,
28991,
34275,
29105,
33216,
30344,
29675,
28914,
46018,
29131,
29086,
28944,
29087,
29960,
28991,
34700,
43072,
28914,
28971,
28518,
29046,
29003,
28835,
4712,
28743,
12673,
28838,
28914,
46018,
28924,
29450,
33778,
31224,
29222,
29146,
33280,
29010,
36599,
28914,
49363,
29054,
28944,
32641,
46018,
29074,
29450,
34526,
28914,
32626,
40497,
28924,
32590,
28518,
30308,
29251,
30912,
29677,
29131,
28518,
35545,
28914,
51009,
29169,
28944,
13,
29010,
33292,
28991,
28924,
32012,
32924,
29450,
29440,
34051,
46018,
28924,
33837,
46018,
33421,
32587,
28914,
33103,
28944,
29450,
28991,
28518,
46018,
28998,
28518,
36101,
28914,
33778,
28924,
29746,
31127,
28518,
29310,
35348,
30163,
32813,
28914,
31249,
31861,
28944,
32663,
46018,
29054,
28914,
33114,
29302,
29010,
32155,
33053,
28924,
41192,
29992,
30163,
42747,
28924,
29746,
41192,
29310,
30150,
29010,
49460,
29169,
49565,
28944,
13,
33238,
33015,
29458,
29366,
29366,
28914,
41261,
29061,
28914,
36599,
38437,
30131,
30631,
28924,
34249,
29065,
48245,
29746,
32850,
28914,
33857,
28944,
33257,
32031,
41772,
28924,
44169,
28969,
29824,
34239,
30266,
28924,
33837,
35115,
29460,
39676,
40016,
29074,
33158,
35523,
29276,
28914,
43604,
28944,
36286,
28991,
28914,
36096,
32557,
28971,
37478,
28914,
28924,
33070,
35155,
49059,
49550,
28914,
36096,
47444,
28924,
29118,
36101,
29131,
32813,
28914,
33778,
28944,
44488,
28914,
29367,
29051,
33151,
33647,
29176,
28971,
28518,
36059,
32710,
28914,
32703,
32854,
28924,
49323,
29010,
32857,
35049,
29276,
32789,
28944,
2,
]
# 将 ID 列表转换为 PyTorch 张量
id_tensor = torch.tensor(id_list)
# 使用 tokenizer 解码
decoded_text = tokenizer.decode(id_tensor, skip_special_tokens=True)
print(f"Decoded text: {decoded_text}")
# -*- coding: utf-8 -*-
import time
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from vita.constants import GLOBAL_WEIGHTS_PATH
model_dir = f"{GLOBAL_WEIGHTS_PATH}/Mixtral-8x7B_modVocab/mg2hg"
tokenizer = AutoTokenizer.from_pretrained(model_dir)
system_prompt = "你是一个人工智能机器人。\n- 你是研究社区开发的大语言模型。你的设计宗旨是有益、诚实且无害。\n- 你支持使用用户选择的多种语言流利地进行交流并解答用户的问题。\n- 如果用户更正你生成的错误答案,你会向用户致歉并与用户探讨正确的答案。"
question = "请详细介绍一下火星。"
chat_template = "system:{system_prompt}</s>\nuser:{question}</s>\nbot:"
text = chat_template.format(system_prompt=system_prompt, question=question)
input_ids = tokenizer(text, return_tensors="pt")["input_ids"]
input_ids = input_ids.to("cuda")
model = AutoModelForCausalLM.from_pretrained(
# model_dir, torch_dtype=torch.float16, device_map="auto",attn_implementation="flash_attention_2").eval()
model_dir,
torch_dtype=torch.float16,
device_map="auto",
).eval()
start_time = time.time()
outputs = model.generate(input_ids, max_new_tokens=10)
time_consume = time.time() - start_time
outputs = outputs.cpu().numpy()[0]
outputs = outputs[len(input_ids[0]) :]
output_text = tokenizer.decode(outputs, skip_special_tokens=True)
print(output_text)
print(f"Time consume: {time_consume}")
# 模型编码
modelCode=1068
# 模型名称
modelName=vita_pytorch
# 模型描述
modelDescription=VITA能够处理视频、图像、文本和音频,具备先进的多模态交互体验,无需使用唤醒词或按钮即可被激活。
# 应用场景
appScenario=推理,对话问答,制造,广媒,金融,能源,医疗,家居,教育
# 框架类型
frameType=pytorch
accelerate==0.30.1
decord==0.6.0
#deepspeed==0.9.5
Jinja2==3.1.4
ninja==1.11.1.1
numpy==1.26.4
#torch==2.3.1
#torchaudio==2.3.1
#torchvision
tqdm==4.66.4
transformers==4.41.1
#xformers
timm
soundfile==0.12.1
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 32,
"hysteresis": 2,
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": "auto",
"eps": "auto",
"weight_decay": "auto"
}
},
"scheduler": {
"type": "WarmupDecayLR",
"params": {
"total_num_steps" : "auto",
"warmup_min_lr": "auto",
"warmup_max_lr": "auto",
"warmup_num_steps": "auto"
}
},
"zero_optimization": {
"stage": 3,
"overlap_comm": true,
"contiguous_gradients": true,
"sub_group_size": 1e9,
"reduce_bucket_size": "auto",
"stage3_prefetch_bucket_size": "auto",
"stage3_param_persistence_threshold": "auto",
"stage3_max_live_parameters": 5e8,
"stage3_max_reuse_distance": 5e8,
"stage3_gather_16bit_weights_on_model_save": true
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"steps_per_print": 2000,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 32,
"hysteresis": 2,
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": "auto",
"eps": "auto",
"weight_decay": "auto"
}
},
"scheduler": {
"type": "WarmupDecayLR",
"params": {
"total_num_steps" : "auto",
"warmup_min_lr": "auto",
"warmup_max_lr": "auto",
"warmup_num_steps": "auto"
}
},
"zero_optimization": {
"stage": 3,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"offload_param": {
"device": "cpu",
"pin_memory": true
},
"overlap_comm": true,
"contiguous_gradients": true,
"sub_group_size": 1e9,
"reduce_bucket_size": "auto",
"stage3_prefetch_bucket_size": "auto",
"stage3_param_persistence_threshold": "auto",
"stage3_max_live_parameters": 5e8,
"stage3_max_reuse_distance": 5e8,
"stage3_gather_16bit_weights_on_model_save": true
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"steps_per_print": 2000,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 32,
"hysteresis": 2,
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": "auto",
"eps": "auto",
"weight_decay": "auto"
}
},
"scheduler": {
"type": "WarmupDecayLR",
"params": {
"total_num_steps" : "auto",
"warmup_min_lr": "auto",
"warmup_max_lr": "auto",
"warmup_num_steps": "auto"
}
},
"zero_optimization": {
"stage": 3,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"overlap_comm": true,
"contiguous_gradients": true,
"sub_group_size": 1e9,
"reduce_bucket_size": "auto",
"stage3_prefetch_bucket_size": "auto",
"stage3_param_persistence_threshold": "auto",
"stage3_max_live_parameters": 5e8,
"stage3_max_reuse_distance": 5e8,
"stage3_gather_16bit_weights_on_model_save": true
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"steps_per_print": 2000,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 16,
"hysteresis": 2,
"min_loss_scale": 1
},
"bf16": {
"enabled": "auto"
},
"train_micro_batch_size_per_gpu": "auto",
"train_batch_size": "auto",
"gradient_accumulation_steps": "auto",
"zero_optimization": {
"stage": 2,
"overlap_comm": true,
"contiguous_gradients": true,
"sub_group_size": 1e9,
"reduce_bucket_size": "auto"
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment