Commit 317a82e2 authored by chenych's avatar chenych
Browse files

Add QWQ-32B

parent 37b0ad9f
...@@ -7,6 +7,7 @@ trust_remote_code: true ...@@ -7,6 +7,7 @@ trust_remote_code: true
stage: ppo stage: ppo
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
......
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path: saves/llama3-8b/lora/sft
### method
stage: sft
do_predict: true
finetuning_type: lora
### dataset
eval_dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 50
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/predict
overwrite_output_dir: true
### eval
per_device_eval_batch_size: 1
predict_with_generate: true
ddp_timeout: 180000000
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: pt stage: pt
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -14,6 +15,7 @@ cutoff_len: 2048 ...@@ -14,6 +15,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/pretrain output_dir: saves/llama3-8b/lora/pretrain
...@@ -21,6 +23,7 @@ logging_steps: 10 ...@@ -21,6 +23,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
...@@ -31,9 +34,11 @@ lr_scheduler_type: cosine ...@@ -31,9 +34,11 @@ lr_scheduler_type: cosine
warmup_ratio: 0.1 warmup_ratio: 0.1
bf16: true bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
resume_from_checkpoint: null
### eval ### eval
val_size: 0.1 # eval_dataset: c4_demo
per_device_eval_batch_size: 1 # val_size: 0.1
eval_strategy: steps # per_device_eval_batch_size: 1
eval_steps: 500 # eval_strategy: steps
# eval_steps: 500
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: rm stage: rm
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -15,6 +16,7 @@ cutoff_len: 2048 ...@@ -15,6 +16,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/reward output_dir: saves/llama3-8b/lora/reward
...@@ -22,6 +24,7 @@ logging_steps: 10 ...@@ -22,6 +24,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
...@@ -32,9 +35,11 @@ lr_scheduler_type: cosine ...@@ -32,9 +35,11 @@ lr_scheduler_type: cosine
warmup_ratio: 0.1 warmup_ratio: 0.1
bf16: true bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
resume_from_checkpoint: null
### eval ### eval
val_size: 0.1 # eval_dataset: dpo_en_demo
per_device_eval_batch_size: 1 # val_size: 0.1
eval_strategy: steps # per_device_eval_batch_size: 1
eval_steps: 500 # eval_strategy: steps
# eval_steps: 500
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -15,6 +16,7 @@ cutoff_len: 2048 ...@@ -15,6 +16,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/sft output_dir: saves/llama3-8b/lora/sft
...@@ -22,6 +24,7 @@ logging_steps: 10 ...@@ -22,6 +24,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
...@@ -32,9 +35,11 @@ lr_scheduler_type: cosine ...@@ -32,9 +35,11 @@ lr_scheduler_type: cosine
warmup_ratio: 0.1 warmup_ratio: 0.1
bf16: true bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
resume_from_checkpoint: null
### eval ### eval
val_size: 0.1 # eval_dataset: alpaca_en_demo
per_device_eval_batch_size: 1 # val_size: 0.1
eval_strategy: steps # per_device_eval_batch_size: 1
eval_steps: 500 # eval_strategy: steps
# eval_steps: 500
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
deepspeed: examples/deepspeed/ds_z0_config.json
### dataset
dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 2048
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 2
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json] deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
...@@ -16,6 +17,7 @@ cutoff_len: 2048 ...@@ -16,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/sft output_dir: saves/llama3-8b/lora/sft
...@@ -23,6 +25,7 @@ logging_steps: 10 ...@@ -23,6 +25,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
...@@ -33,9 +36,11 @@ lr_scheduler_type: cosine ...@@ -33,9 +36,11 @@ lr_scheduler_type: cosine
warmup_ratio: 0.1 warmup_ratio: 0.1
bf16: true bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
resume_from_checkpoint: null
### eval ### eval
val_size: 0.1 # eval_dataset: alpaca_en_demo
per_device_eval_batch_size: 1 # val_size: 0.1
eval_strategy: steps # per_device_eval_batch_size: 1
eval_steps: 500 # eval_strategy: steps
# eval_steps: 500
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -16,6 +17,7 @@ cutoff_len: 2048 ...@@ -16,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: tmp_dir output_dir: tmp_dir
...@@ -23,6 +25,15 @@ logging_steps: 10 ...@@ -23,6 +25,15 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
### ray
ray_run_name: llama3_8b_sft_lora
ray_storage_path: ./saves
ray_num_workers: 4 # number of GPUs to use
resources_per_worker:
GPU: 1
placement_strategy: PACK
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
...@@ -33,16 +44,11 @@ lr_scheduler_type: cosine ...@@ -33,16 +44,11 @@ lr_scheduler_type: cosine
warmup_ratio: 0.1 warmup_ratio: 0.1
bf16: true bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
resume_from_checkpoint: null
### eval ### eval
val_size: 0.1 # eval_dataset: alpaca_en_demo
per_device_eval_batch_size: 1 # val_size: 0.1
eval_strategy: steps # per_device_eval_batch_size: 1
eval_steps: 500 # eval_strategy: steps
# eval_steps: 500
### ray
ray_run_name: llama3_8b_sft_lora
ray_num_workers: 4 # number of GPUs to use
resources_per_worker:
GPU: 1
placement_strategy: PACK
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
......
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -15,6 +16,7 @@ cutoff_len: 2048 ...@@ -15,6 +16,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llava1_5-7b/lora/sft output_dir: saves/llava1_5-7b/lora/sft
...@@ -22,6 +24,7 @@ logging_steps: 10 ...@@ -22,6 +24,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
...@@ -32,9 +35,10 @@ lr_scheduler_type: cosine ...@@ -32,9 +35,10 @@ lr_scheduler_type: cosine
warmup_ratio: 0.1 warmup_ratio: 0.1
bf16: true bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
resume_from_checkpoint: null
### eval ### eval
val_size: 0.1 # val_size: 0.1
per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
eval_strategy: steps # eval_strategy: steps
eval_steps: 500 # eval_steps: 500
### model ### model
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
image_max_pixels: 262144
video_max_pixels: 16384
trust_remote_code: true trust_remote_code: true
### method ### method
stage: dpo stage: dpo
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
pref_beta: 0.1 pref_beta: 0.1
pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo] pref_loss: sigmoid # choices: [sigmoid (dpo), orpo, simpo]
...@@ -17,6 +20,7 @@ cutoff_len: 2048 ...@@ -17,6 +20,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/qwen2_vl-7b/lora/dpo output_dir: saves/qwen2_vl-7b/lora/dpo
...@@ -24,6 +28,7 @@ logging_steps: 10 ...@@ -24,6 +28,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
...@@ -34,9 +39,10 @@ lr_scheduler_type: cosine ...@@ -34,9 +39,10 @@ lr_scheduler_type: cosine
warmup_ratio: 0.1 warmup_ratio: 0.1
bf16: true bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
resume_from_checkpoint: null
### eval ### eval
val_size: 0.1 # val_size: 0.1
per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
eval_strategy: steps # eval_strategy: steps
eval_steps: 500 # eval_steps: 500
### model ### model
model_name_or_path: Qwen/Qwen2-VL-7B-Instruct model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
image_max_pixels: 262144
video_max_pixels: 16384
trust_remote_code: true trust_remote_code: true
### method ### method
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -15,6 +18,7 @@ cutoff_len: 2048 ...@@ -15,6 +18,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/qwen2_vl-7b/lora/sft output_dir: saves/qwen2_vl-7b/lora/sft
...@@ -22,6 +26,7 @@ logging_steps: 10 ...@@ -22,6 +26,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
...@@ -32,9 +37,10 @@ lr_scheduler_type: cosine ...@@ -32,9 +37,10 @@ lr_scheduler_type: cosine
warmup_ratio: 0.1 warmup_ratio: 0.1
bf16: true bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
resume_from_checkpoint: null
### eval ### eval
val_size: 0.1 # val_size: 0.1
per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
eval_strategy: steps # eval_strategy: steps
eval_steps: 500 # eval_steps: 500
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -34,7 +35,7 @@ bf16: true ...@@ -34,7 +35,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
val_size: 0.1 # val_size: 0.1
per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
eval_strategy: steps # eval_strategy: steps
eval_steps: 500 # eval_steps: 500
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -34,7 +35,7 @@ bf16: true ...@@ -34,7 +35,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
val_size: 0.1 # val_size: 0.1
per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
eval_strategy: steps # eval_strategy: steps
eval_steps: 500 # eval_steps: 500
### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit: 4
### method
stage: sft
do_train: true
finetuning_type: lora
lora_target: all
### dataset
dataset: identity,alpaca_en_demo
template: llama3
cutoff_len: 1024
max_samples: 1000
overwrite_cache: true
preprocessing_num_workers: 16
### output
output_dir: saves/llama3-8b/lora/sft
logging_steps: 10
save_steps: 500
plot_loss: true
overwrite_output_dir: true
### train
per_device_train_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
fp16: true
ddp_timeout: 180000000
### eval
val_size: 0.1
per_device_eval_batch_size: 1
eval_strategy: steps
eval_steps: 500
...@@ -9,6 +9,7 @@ trust_remote_code: true ...@@ -9,6 +9,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -37,7 +38,7 @@ bf16: true ...@@ -37,7 +38,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
val_size: 0.1 # val_size: 0.1
per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
eval_strategy: steps # eval_strategy: steps
eval_steps: 500 # eval_steps: 500
...@@ -6,6 +6,7 @@ trust_remote_code: true ...@@ -6,6 +6,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -34,7 +35,7 @@ bf16: true ...@@ -34,7 +35,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
val_size: 0.1 # val_size: 0.1
per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
eval_strategy: steps # eval_strategy: steps
eval_steps: 500 # eval_steps: 500
...@@ -8,6 +8,7 @@ trust_remote_code: true ...@@ -8,6 +8,7 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: lora finetuning_type: lora
lora_rank: 8
lora_target: all lora_target: all
### dataset ### dataset
...@@ -36,7 +37,7 @@ bf16: true ...@@ -36,7 +37,7 @@ bf16: true
ddp_timeout: 180000000 ddp_timeout: 180000000
### eval ### eval
val_size: 0.1 # val_size: 0.1
per_device_eval_batch_size: 1 # per_device_eval_batch_size: 1
eval_strategy: steps # eval_strategy: steps
eval_steps: 500 # eval_steps: 500
...@@ -2,6 +2,22 @@ ...@@ -2,6 +2,22 @@
requires = ["setuptools>=61.0"] requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[project]
name = "llamafactory"
dynamic = [
"version",
"dependencies",
"optional-dependencies",
"requires-python",
"scripts",
"authors",
"description",
"readme",
"license",
"keywords",
"classifiers"
]
[tool.ruff] [tool.ruff]
target-version = "py38" target-version = "py38"
line-length = 119 line-length = 119
...@@ -31,3 +47,19 @@ indent-style = "space" ...@@ -31,3 +47,19 @@ indent-style = "space"
docstring-code-format = true docstring-code-format = true
skip-magic-trailing-comma = false skip-magic-trailing-comma = false
line-ending = "auto" line-ending = "auto"
[tool.uv]
conflicts = [
[
{ extra = "torch-npu" },
{ extra = "aqlm" },
],
[
{ extra = "torch-npu" },
{ extra = "liger-kernel" },
],
[
{ extra = "torch-npu" },
{ extra = "vllm" },
]
]
transformers>=4.41.2,<=4.46.1 transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.*;python_version<'3.10'
datasets>=2.16.0,<=3.1.0 transformers>=4.41.2,<=4.49.0,!=4.46.*,!=4.47.*,!=4.48.0;python_version>='3.10'
accelerate>=0.34.0,<=1.0.1 datasets>=2.16.0,<=3.2.0
accelerate>=0.34.0,<=1.2.1
peft>=0.11.1,<=0.12.0 peft>=0.11.1,<=0.12.0
trl>=0.8.6,<=0.9.6 trl>=0.8.6,<=0.9.6
tokenizers>=0.19.0,<0.20.4 tokenizers>=0.19.0,<=0.21.0
gradio>=4.0.0,<5.0.0 gradio>=4.38.0,<=5.18.0
pandas>=2.0.0 pandas>=2.0.0
scipy scipy
einops einops
...@@ -21,4 +22,5 @@ packaging ...@@ -21,4 +22,5 @@ packaging
pyyaml pyyaml
numpy<2.0.0 numpy<2.0.0
av av
librosa
tyro<0.9.0 tyro<0.9.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment