"openmmapi/src/MonteCarloFlexibleBarostatImpl.cpp" did not exist on "b7a24d6cf0304b32a7625ca665f49af71c6088ce"
Commit 876a36a4 authored by raojy's avatar raojy
Browse files

first

parent eda2afb8
accelerate<1
bitsandbytes==0.42.0
decord
deepspeed>=0.13.5
einops==0.6.1
einops-exts==0.0.4
huggingface_hub
imageio
numpy==1.26.4
opencv-python
orjson
peft==0.10.0
pycocoevalcap
pyyaml
scikit-learn>=1.2.2
scipy
sentencepiece==0.1.99
shortuuid
tensorboardX
termcolor
timm==0.9.12
tokenizers==0.15.1
torch>=2
torchvision>=0.15
tqdm
transformers==4.37.2
yacs
datasets:
- path: /path/to/SenseNova-SI-800K/SenseNova-SI-800K_qwen3vl_format.jsonl
data_folder: /path/to/SenseNova-SI-800K/
data_type: jsonl
datasets:
- path: /path/to/SenseNova-SI-8M/SenseNova-SI-8M_qwen3vl_format.jsonl
data_folder: /path/to/SenseNova-SI-8M/
data_type: jsonl
"""Preprocess SenseNova-SI dataset JSONL into lmms-engine compatible format.
This script fixes two schema incompatibilities:
1. `image` mixed types (`str` and `list[str]`) -> normalized to `list[str]`.
2. `conversations` format -> converted to `messages` with structured `content`.
"""
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Any
def normalize_image_field(sample: dict[str, Any]) -> bool:
"""Normalize `image` to list[str] for Arrow/HF Dataset compatibility."""
image = sample.get("image")
if isinstance(image, str):
sample["image"] = [image]
return True
if isinstance(image, list):
return False
if image is None:
return False
raise ValueError(f"Unsupported image type: {type(image).__name__}")
def map_conversations_to_messages(sample: dict[str, Any]) -> bool:
"""Convert OpenAI-like `conversations` into lmms-engine `messages`."""
conversations = sample.get("conversations")
if conversations is None:
return False
if not isinstance(conversations, list):
raise ValueError("`conversations` must be a list.")
mapped_messages: list[dict[str, Any]] = []
for conversation in conversations:
if not isinstance(conversation, dict):
raise ValueError("Each `conversations` item must be an object.")
sender = conversation.get("from")
text = conversation.get("value", "")
if sender == "human":
role = "user"
elif sender == "gpt":
role = "assistant"
else:
role = str(sender) if sender is not None else "user"
mapped_messages.append(
{
"role": role,
"content": [{"type": "text", "text": text}],
}
)
sample["messages"] = mapped_messages
del sample["conversations"]
return True
def default_output_path(src_path: Path) -> Path:
"""Build default output path with `_qwen3vl_format` suffix."""
return src_path.with_name(
f"{src_path.stem}_qwen3vl_format{src_path.suffix or '.jsonl'}"
)
def preprocess_jsonl(src_path: Path, dst_path: Path) -> None:
"""Read JSONL, normalize each sample, and write mapped JSONL."""
image_fixed_count = 0
conversation_fixed_count = 0
total_count = 0
dst_path.parent.mkdir(parents=True, exist_ok=True)
with (
src_path.open("r", encoding="utf-8") as source,
dst_path.open("w", encoding="utf-8") as target,
):
for line_number, line in enumerate(source, start=1):
stripped = line.strip()
if not stripped:
continue
try:
sample = json.loads(stripped)
except json.JSONDecodeError as error:
raise ValueError(
f"Invalid JSON at line {line_number}: {error}"
) from error
if not isinstance(sample, dict):
raise ValueError(f"Line {line_number} is not a JSON object.")
if normalize_image_field(sample):
image_fixed_count += 1
if map_conversations_to_messages(sample):
conversation_fixed_count += 1
target.write(json.dumps(sample, ensure_ascii=False) + "\n")
total_count += 1
print(
"Done."
f" total={total_count},"
f" image_fixed={image_fixed_count},"
f" conversations_mapped={conversation_fixed_count},"
f" output='{dst_path}'"
)
def build_args() -> argparse.Namespace:
"""Build and parse CLI arguments."""
parser = argparse.ArgumentParser(
description="Preprocess SenseNova-SI dataset JSONL for lmms-engine training."
)
parser.add_argument(
"--src",
required=True,
type=Path,
help="Path to original SenseNova-SI dataset JSONL.",
)
parser.add_argument(
"--dst",
type=Path,
default=None,
help="Output JSONL path. Default: <src_stem>_qwen3vl_format.jsonl",
)
return parser.parse_args()
def main() -> None:
"""Script entrypoint."""
args = build_args()
dst_path = args.dst if args.dst is not None else default_output_path(args.src)
preprocess_jsonl(src_path=args.src, dst_path=dst_path)
if __name__ == "__main__":
main()
#!/bin/bash
################################################################################
# Qwen3-VL 8B Training with FSDP2 + Ulysses Sequence Parallel
################################################################################
#
# DESCRIPTION:
# Train Qwen3-VL vision-language model with support for long sequences
# using Ulysses Sequence Parallel and FSDP2 distributed training.
#
# KEY FEATURES:
# - Multi-resolution visual understanding
# - Ulysses SP for 10K+ visual tokens
# - Flash Attention 2 + unpadding (use_rmpad)
# - Sequence packing (35-40% MFU)
# - Liger Kernel fused operations
# - FSDP2 distributed training
#
# REQUIREMENTS:
# - 8x GPUs (A100/H100 recommended, 80GB VRAM)
# - flash-attn: pip install flash-attn --no-build-isolation
# - liger-kernel: pip install liger-kernel
#
# DATASET:
# Prepare your dataset in OpenAI chat format (JSONL/Arrow):
# See: docs/user_guide/data_prep.md
#
# Example dataset YAML (data/video/debug.yaml):
# ```yaml
# datasets:
# - path: /path/to/your/dataset
# data_folder: ""
# data_type: arrow
# ```
#
# CONFIGURATION:
# Edit example_config.yaml to customize:
# - Model size (2B/8B/72B): change load_from_pretrained_path
# - Sequence length: adjust packing_length
# - SP degree: set sp_ulysses_degree (1/2/4/8)
# - Batch size: per_device_train_batch_size
# - Max frames: video_max_frames
#
# PERFORMANCE TIPS:
# - Adjust sp_ulysses_degree based on sequence length:
# * Degree 1: < 10K tokens
# * Degree 2: 10K-20K tokens
# * Degree 4: 20K-40K tokens
# * Degree 8: 40K+ tokens
# - Enable packing for better MFU: set packing: true
# - Use gradient_checkpointing for larger models (already enabled)
# - Monitor memory with: watch -n 1 nvidia-smi
#
################################################################################
# Number of GPUs
NGPUS=8
# Dataset scale: first argument 800K or 8M (default: 800K).
# Example: bash training/qwen3_vl/run.sh 8M
DATA_SCALE="${1:-800K}"
case "${DATA_SCALE}" in
800K)
TRAIN_CONFIG="training/qwen3_vl/train_config_800K.yaml"
;;
8M)
TRAIN_CONFIG="training/qwen3_vl/train_config_8M.yaml"
;;
*)
echo "Usage: $0 [800K|8M]" >&2
echo " 800K SenseNova-SI 800K preset (train_config_800K.yaml + data_800K.yaml)" >&2
echo " 8M SenseNova-SI 8M preset (train_config_8M.yaml + data_8M.yaml)" >&2
exit 1
;;
esac
# Training command
torchrun --nproc_per_node=${NGPUS} \
--nnodes=1 \
--node_rank=0 \
--master_addr=127.0.0.1 \
--master_port=12355 \
-m lmms_engine.launch.cli \
config_yaml="${TRAIN_CONFIG}"
################################################################################
# MULTI-NODE TRAINING:
#
# On rank 0 node:
# torchrun --nproc_per_node=8 \
# --nnodes=2 \
# --node_rank=0 \
# --master_addr=<RANK_0_IP> \
# --master_port=12355 \
# -m lmms_engine.launch.cli \
# config_yaml=training/qwen3_vl/train_config.yaml
#
# On rank 1 node:
# torchrun --nproc_per_node=8 \
# --nnodes=2 \
# --node_rank=1 \
# --master_addr=<RANK_0_IP> \
# --master_port=12355 \
# -m lmms_engine.launch.cli \
# config_yaml=training/qwen3_vl/train_config.yaml
#
################################################################################
trainer_type: fsdp2_trainer
dataset_config:
extra_kwargs: {}
dataset_type: qwen3_vl_iterable
dataset_format: yaml
processor_config:
processor_name: Qwen/Qwen3-VL-8B-Instruct
processor_type: qwen3_vl
dataset_path: training/qwen3_vl/data_800K.yaml
datasets: null
shuffle: true
eval_dataset_path: null
object_storage: none
bucket_name: null
packing: false
packing_strategy: first_fit
packing_length: 40000
filter_overlong: true
filter_overlong_workers: 8
max_length: null
video_sampling_strategy: fps
video_max_pixels: 50176
video_max_frames: 512
frame_num: 64
fps: 1
video_backend: qwen_vl_utils
trainer_args:
output_dir: ./results/qwen3_vl/sensenova_si_800K
overwrite_output_dir: false
do_train: false
do_eval: false
do_predict: false
eval_strategy: 'no'
prediction_loss_only: false
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
per_gpu_train_batch_size: null
per_gpu_eval_batch_size: null
gradient_accumulation_steps: 1
eval_accumulation_steps: null
eval_delay: 0
torch_empty_cache_steps: null
learning_rate: 1.0e-05
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
num_train_epochs: 1
max_steps: 1000
lr_scheduler_type: cosine
lr_scheduler_kwargs: {}
warmup_ratio: 0.01
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: true
logging_dir: ./output/qwen3_vl_training/runs
logging_strategy: steps
logging_first_step: false
logging_steps: 1
logging_nan_inf_filter: true
save_strategy: steps
save_steps: 200
save_total_limit: 1
save_safetensors: true
save_on_each_node: false
save_only_model: false
restore_callback_states_from_checkpoint: false
no_cuda: false
use_cpu: false
use_mps_device: false
seed: 42
data_seed: null
jit_mode_eval: false
bf16: true
fp16: false
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: false
fp16_full_eval: false
tf32: null
local_rank: 0
ddp_backend: null
tpu_num_cores: null
tpu_metrics_debug: false
debug: []
dataloader_drop_last: false
eval_steps: null
dataloader_num_workers: 0
dataloader_prefetch_factor: null
past_index: -1
run_name: video_debug
disable_tqdm: false
remove_unused_columns: true
label_names: null
load_best_model_at_end: false
metric_for_best_model: null
greater_is_better: null
ignore_data_skip: false
fsdp: []
fsdp_min_num_params: 0
fsdp_config:
transformer_layer_cls_to_wrap:
- Qwen3VLTextDecoderLayer
reshard_after_forward: false
min_num_params: 0
xla: false
xla_fsdp_v2: false
xla_fsdp_grad_ckpt: false
fsdp_transformer_layer_cls_to_wrap: null
accelerator_config:
split_batches: false
dispatch_batches: null
even_batches: true
use_seedable_sampler: true
non_blocking: false
gradient_accumulation_kwargs: null
parallelism_config: null
deepspeed: null
label_smoothing_factor: 0.0
optim: adamw_torch_fused
optim_args: null
adafactor: false
group_by_length: false
length_column_name: length
report_to: []
project: huggingface
trackio_space_id: trackio
ddp_find_unused_parameters: null
ddp_bucket_cap_mb: null
ddp_broadcast_buffers: null
dataloader_pin_memory: true
dataloader_persistent_workers: false
skip_memory_metrics: true
use_legacy_prediction_loop: false
push_to_hub: false
resume_from_checkpoint: null
hub_model_id: null
hub_strategy: every_save
hub_token: <HUB_TOKEN>
hub_private_repo: null
hub_always_push: false
hub_revision: null
gradient_checkpointing: true
gradient_checkpointing_kwargs: null
include_inputs_for_metrics: false
include_for_metrics: []
eval_do_concat_batches: true
fp16_backend: auto
push_to_hub_model_id: null
push_to_hub_organization: null
mp_parameters: ''
auto_find_batch_size: false
full_determinism: false
torchdynamo: null
ray_scope: last
ddp_timeout: 1800
torch_compile: false
torch_compile_backend: null
torch_compile_mode: null
include_tokens_per_second: false
include_num_input_tokens_seen: 'no'
neftune_noise_alpha: null
optim_target_modules: null
batch_eval_metrics: false
eval_on_start: false
use_liger_kernel: true
liger_kernel_config: null
eval_use_gather_object: false
average_tokens_across_devices: true
use_muon: false
freeze_modules: null
use_rmpad: true
fsdp2: true
sp_ulysses_degree: 1
reduce_dtype: bfloat16
output_dtype: bfloat16
print_batch_input_steps: 5
enable_profiler: false
profiler_config:
start_step: 1
end_step: 3
model_config:
extra_kwargs: {}
load_from_pretrained_path: training/pretrained_models/Qwen/Qwen3-VL-8B-Instruct
load_from_config: null
attn_implementation: flash_attention_2
overwrite_config: null
monkey_patch_kwargs: null
extra_kwargs: null
\ No newline at end of file
trainer_type: fsdp2_trainer
dataset_config:
extra_kwargs: {}
dataset_type: qwen3_vl_iterable
dataset_format: yaml
processor_config:
processor_name: Qwen/Qwen3-VL-8B-Instruct
processor_type: qwen3_vl
dataset_path: training/qwen3_vl/data_8M.yaml
datasets: null
shuffle: true
eval_dataset_path: null
object_storage: none
bucket_name: null
packing: false
packing_strategy: first_fit
packing_length: 40000
filter_overlong: true
filter_overlong_workers: 8
max_length: null
video_sampling_strategy: fps
video_max_pixels: 50176
video_max_frames: 512
frame_num: 64
fps: 1
video_backend: qwen_vl_utils
trainer_args:
output_dir: ./results/qwen3_vl/sensenova_si_8M
overwrite_output_dir: false
do_train: false
do_eval: false
do_predict: false
eval_strategy: 'no'
prediction_loss_only: false
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
per_gpu_train_batch_size: null
per_gpu_eval_batch_size: null
gradient_accumulation_steps: 1
eval_accumulation_steps: null
eval_delay: 0
torch_empty_cache_steps: null
learning_rate: 1.0e-05
weight_decay: 0.0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
num_train_epochs: 1
max_steps: 1000
lr_scheduler_type: cosine
lr_scheduler_kwargs: {}
warmup_ratio: 0.01
warmup_steps: 0
log_level: passive
log_level_replica: warning
log_on_each_node: true
logging_dir: ./output/qwen3_vl_training/runs
logging_strategy: steps
logging_first_step: false
logging_steps: 1
logging_nan_inf_filter: true
save_strategy: steps
save_steps: 200
save_total_limit: 1
save_safetensors: true
save_on_each_node: false
save_only_model: false
restore_callback_states_from_checkpoint: false
no_cuda: false
use_cpu: false
use_mps_device: false
seed: 42
data_seed: null
jit_mode_eval: false
bf16: true
fp16: false
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: false
fp16_full_eval: false
tf32: null
local_rank: 0
ddp_backend: null
tpu_num_cores: null
tpu_metrics_debug: false
debug: []
dataloader_drop_last: false
eval_steps: null
dataloader_num_workers: 0
dataloader_prefetch_factor: null
past_index: -1
run_name: video_debug
disable_tqdm: false
remove_unused_columns: true
label_names: null
load_best_model_at_end: false
metric_for_best_model: null
greater_is_better: null
ignore_data_skip: false
fsdp: []
fsdp_min_num_params: 0
fsdp_config:
transformer_layer_cls_to_wrap:
- Qwen3VLTextDecoderLayer
reshard_after_forward: false
min_num_params: 0
xla: false
xla_fsdp_v2: false
xla_fsdp_grad_ckpt: false
fsdp_transformer_layer_cls_to_wrap: null
accelerator_config:
split_batches: false
dispatch_batches: null
even_batches: true
use_seedable_sampler: true
non_blocking: false
gradient_accumulation_kwargs: null
parallelism_config: null
deepspeed: null
label_smoothing_factor: 0.0
optim: adamw_torch_fused
optim_args: null
adafactor: false
group_by_length: false
length_column_name: length
report_to: []
project: huggingface
trackio_space_id: trackio
ddp_find_unused_parameters: null
ddp_bucket_cap_mb: null
ddp_broadcast_buffers: null
dataloader_pin_memory: true
dataloader_persistent_workers: false
skip_memory_metrics: true
use_legacy_prediction_loop: false
push_to_hub: false
resume_from_checkpoint: null
hub_model_id: null
hub_strategy: every_save
hub_token: <HUB_TOKEN>
hub_private_repo: null
hub_always_push: false
hub_revision: null
gradient_checkpointing: true
gradient_checkpointing_kwargs: null
include_inputs_for_metrics: false
include_for_metrics: []
eval_do_concat_batches: true
fp16_backend: auto
push_to_hub_model_id: null
push_to_hub_organization: null
mp_parameters: ''
auto_find_batch_size: false
full_determinism: false
torchdynamo: null
ray_scope: last
ddp_timeout: 1800
torch_compile: false
torch_compile_backend: null
torch_compile_mode: null
include_tokens_per_second: false
include_num_input_tokens_seen: 'no'
neftune_noise_alpha: null
optim_target_modules: null
batch_eval_metrics: false
eval_on_start: false
use_liger_kernel: true
liger_kernel_config: null
eval_use_gather_object: false
average_tokens_across_devices: true
use_muon: false
freeze_modules: null
use_rmpad: true
fsdp2: true
sp_ulysses_degree: 1
reduce_dtype: bfloat16
output_dtype: bfloat16
print_batch_input_steps: 5
enable_profiler: false
profiler_config:
start_step: 1
end_step: 3
model_config:
extra_kwargs: {}
load_from_pretrained_path: training/pretrained_models/Qwen/Qwen3-VL-8B-Instruct
load_from_config: null
attn_implementation: flash_attention_2
overwrite_config: null
monkey_patch_kwargs: null
extra_kwargs: null
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
doc/1.png

439 KB

doc/2.jpg

85.6 KB

doc/3.png

59.1 KB

doc/4.png

54.2 KB

doc/5.png

65.2 KB

icon.png

50.3 KB

# 模型唯一标识
modelCode=15311
# 模型名称
modelName=SenseNova-U1
# 模型描述
modelDescription=SenseNova-U1 是一款基于 NEO-unify 架构的原生统一多模态大模型,摒弃传统视觉编码器与 VAE 结构,端到端融合图文信息。模型拥有 8B 密集型与 A3B 混合专家两种规格,可实现图文理解、图像生成、图片编辑及图文交错创作,多项任务性能达开源顶尖水平,支持量化压缩与低显存部署,适配多样应用场景。
# 运行过程
processType=推理
# 算法类别
appCategory=多模态
# 框架类型
frameType=pytorch
# 加速卡类型
accelerateType=BW1000
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment