Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenych
llama-grpo
Commits
c7c477c7
Commit
c7c477c7
authored
Sep 24, 2025
by
chenych
Browse files
add grpo
parents
Pipeline
#2942
failed with stages
in 0 seconds
Changes
282
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
594 additions
and
0 deletions
+594
-0
examples/inference/llama3.yaml
examples/inference/llama3.yaml
+4
-0
examples/inference/llama3_full_sft.yaml
examples/inference/llama3_full_sft.yaml
+4
-0
examples/inference/llama3_lora_sft.yaml
examples/inference/llama3_lora_sft.yaml
+5
-0
examples/inference/qwen2_5vl.yaml
examples/inference/qwen2_5vl.yaml
+4
-0
examples/merge_lora/llama3_full_sft.yaml
examples/merge_lora/llama3_full_sft.yaml
+10
-0
examples/merge_lora/llama3_gptq.yaml
examples/merge_lora/llama3_gptq.yaml
+12
-0
examples/merge_lora/llama3_lora_sft.yaml
examples/merge_lora/llama3_lora_sft.yaml
+13
-0
examples/merge_lora/qwen2_5vl_lora_sft.yaml
examples/merge_lora/qwen2_5vl_lora_sft.yaml
+13
-0
examples/train_full/llama3_full_sft.yaml
examples/train_full/llama3_full_sft.yaml
+45
-0
examples/train_full/qwen2_5vl_full_sft.yaml
examples/train_full/qwen2_5vl_full_sft.yaml
+49
-0
examples/train_lora/llama3_lora_dpo.yaml
examples/train_lora/llama3_lora_dpo.yaml
+48
-0
examples/train_lora/llama3_lora_eval.yaml
examples/train_lora/llama3_lora_eval.yaml
+19
-0
examples/train_lora/llama3_lora_kto.yaml
examples/train_lora/llama3_lora_kto.yaml
+44
-0
examples/train_lora/llama3_lora_ppo.yaml
examples/train_lora/llama3_lora_ppo.yaml
+43
-0
examples/train_lora/llama3_lora_pretrain.yaml
examples/train_lora/llama3_lora_pretrain.yaml
+45
-0
examples/train_lora/llama3_lora_reward.yaml
examples/train_lora/llama3_lora_reward.yaml
+46
-0
examples/train_lora/llama3_lora_sft.sh
examples/train_lora/llama3_lora_sft.sh
+36
-0
examples/train_lora/llama3_lora_sft.yaml
examples/train_lora/llama3_lora_sft.yaml
+46
-0
examples/train_lora/llama3_lora_sft_ds3.yaml
examples/train_lora/llama3_lora_sft_ds3.yaml
+47
-0
examples/train_lora/llama3_lora_sft_ray.yaml
examples/train_lora/llama3_lora_sft_ray.yaml
+61
-0
No files found.
examples/inference/llama3.yaml
0 → 100644
View file @
c7c477c7
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
template
:
llama3
infer_backend
:
huggingface
# choices: [huggingface, vllm, sglang]
trust_remote_code
:
true
examples/inference/llama3_full_sft.yaml
0 → 100644
View file @
c7c477c7
model_name_or_path
:
saves/llama3-8b/full/sft
template
:
llama3
infer_backend
:
huggingface
# choices: [huggingface, vllm, sglang]
trust_remote_code
:
true
examples/inference/llama3_lora_sft.yaml
0 → 100644
View file @
c7c477c7
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path
:
saves/llama3-8b/lora/sft
template
:
llama3
infer_backend
:
huggingface
# choices: [huggingface, vllm, sglang]
trust_remote_code
:
true
examples/inference/qwen2_5vl.yaml
0 → 100644
View file @
c7c477c7
model_name_or_path
:
Qwen/Qwen2.5-VL-7B-Instruct
template
:
qwen2_vl
infer_backend
:
huggingface
# choices: [huggingface, vllm, sglang]
trust_remote_code
:
true
examples/merge_lora/llama3_full_sft.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
saves/llama3-8b/full/sft
template
:
llama3
trust_remote_code
:
true
### export
export_dir
:
output/llama3_full_sft
export_size
:
5
export_device
:
cpu
# choices: [cpu, auto]
export_legacy_format
:
false
examples/merge_lora/llama3_gptq.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
template
:
llama3
trust_remote_code
:
true
### export
export_dir
:
output/llama3_gptq
export_quantization_bit
:
4
export_quantization_dataset
:
data/c4_demo.jsonl
export_size
:
5
export_device
:
cpu
# choices: [cpu, auto]
export_legacy_format
:
false
examples/merge_lora/llama3_lora_sft.yaml
0 → 100644
View file @
c7c477c7
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path
:
saves/llama3-8b/lora/sft
template
:
llama3
trust_remote_code
:
true
### export
export_dir
:
output/llama3_lora_sft
export_size
:
5
export_device
:
cpu
# choices: [cpu, auto]
export_legacy_format
:
false
examples/merge_lora/qwen2_5vl_lora_sft.yaml
0 → 100644
View file @
c7c477c7
### Note: DO NOT use quantized model or quantization_bit when merging lora adapters
### model
model_name_or_path
:
Qwen/Qwen2.5-VL-7B-Instruct
adapter_name_or_path
:
saves/qwen2_5vl-7b/lora/sft
template
:
qwen2_vl
trust_remote_code
:
true
### export
export_dir
:
output/qwen2_5vl_lora_sft
export_size
:
5
export_device
:
cpu
# choices: [cpu, auto]
export_legacy_format
:
false
examples/train_full/llama3_full_sft.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
full
deepspeed
:
examples/deepspeed/ds_z3_config.json
# choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/llama3-8b/full/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
2
learning_rate
:
1.0e-5
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
resume_from_checkpoint
:
null
### eval
# eval_dataset: alpaca_en_demo
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
examples/train_full/qwen2_5vl_full_sft.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
Qwen/Qwen2.5-VL-7B-Instruct
image_max_pixels
:
262144
video_max_pixels
:
16384
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
full
freeze_vision_tower
:
true
freeze_multi_modal_projector
:
true
freeze_language_model
:
false
deepspeed
:
examples/deepspeed/ds_z3_config.json
### dataset
dataset
:
mllm_demo,identity,alpaca_en_demo
template
:
qwen2_vl
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/qwen2_5vl-7b/full/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
2
learning_rate
:
1.0e-5
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
resume_from_checkpoint
:
null
### eval
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
examples/train_lora/llama3_lora_dpo.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
dpo
do_train
:
true
finetuning_type
:
lora
lora_rank
:
8
lora_target
:
all
pref_beta
:
0.1
pref_loss
:
sigmoid
# choices: [sigmoid (dpo), orpo, simpo]
### dataset
dataset
:
dpo_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/llama3-8b/lora/dpo
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
5.0e-6
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
resume_from_checkpoint
:
null
### eval
# eval_dataset: dpo_en_demo
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
examples/train_lora/llama3_lora_eval.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path
:
saves/llama3-8b/lora/sft
trust_remote_code
:
true
### method
finetuning_type
:
lora
### dataset
task
:
mmlu_test
# choices: [mmlu_test, ceval_validation, cmmlu_test]
template
:
fewshot
lang
:
en
n_shot
:
5
### output
save_dir
:
saves/llama3-8b/lora/eval
### eval
batch_size
:
4
examples/train_lora/llama3_lora_kto.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
kto
do_train
:
true
finetuning_type
:
lora
lora_rank
:
8
lora_target
:
all
pref_beta
:
0.1
### dataset
dataset
:
kto_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/llama3-8b/lora/kto
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
5.0e-6
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
examples/train_lora/llama3_lora_ppo.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
reward_model
:
saves/llama3-8b/lora/reward
trust_remote_code
:
true
### method
stage
:
ppo
do_train
:
true
finetuning_type
:
lora
lora_rank
:
8
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/llama3-8b/lora/ppo
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-5
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### generate
max_new_tokens
:
512
top_k
:
0
top_p
:
0.9
examples/train_lora/llama3_lora_pretrain.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
pt
do_train
:
true
finetuning_type
:
lora
lora_rank
:
8
lora_target
:
all
### dataset
dataset
:
c4_demo
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/llama3-8b/lora/pretrain
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
resume_from_checkpoint
:
null
### eval
# eval_dataset: c4_demo
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
examples/train_lora/llama3_lora_reward.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
rm
do_train
:
true
finetuning_type
:
lora
lora_rank
:
8
lora_target
:
all
### dataset
dataset
:
dpo_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/llama3-8b/lora/reward
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
resume_from_checkpoint
:
null
### eval
# eval_dataset: dpo_en_demo
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
examples/train_lora/llama3_lora_sft.sh
0 → 100644
View file @
c7c477c7
#!/bin/bash
set
-x
MODEL_PATH
=
meta-llama/Meta-Llama-3-8B-Instruct
llamafactory-cli train
\
--model_name_or_path
${
MODEL_PATH
}
\
--trust_remote_code
\
--stage
sft
\
--do_train
\
--finetuning_type
lora
\
--lora_rank
8
\
--lora_target
all
\
--dataset
identity,alpaca_en_demo
\
--template
llama3
\
--cutoff_len
2048
\
--max_samples
1000
\
--overwrite_cache
\
--preprocessing_num_workers
16
\
--dataloader_num_workers
4
\
--output_dir
saves/llama3-8b/lora/sft
\
--logging_steps
10
\
--save_steps
500
\
--plot_loss
\
--overwrite_output_dir
\
--save_only_model
false
\
--report_to
none
\
--per_device_train_batch_size
1
\
--gradient_accumulation_steps
8
\
--learning_rate
1e-4
\
--num_train_epochs
3.0
\
--lr_scheduler_type
cosine
\
--warmup_ratio
0.1
\
--bf16
\
--ddp_timeout
180000000
examples/train_lora/llama3_lora_sft.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_rank
:
8
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
resume_from_checkpoint
:
null
### eval
# eval_dataset: alpaca_en_demo
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
examples/train_lora/llama3_lora_sft_ds3.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_rank
:
8
lora_target
:
all
deepspeed
:
examples/deepspeed/ds_z3_config.json
# choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
2
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
resume_from_checkpoint
:
null
### eval
# eval_dataset: alpaca_en_demo
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
examples/train_lora/llama3_lora_sft_ray.yaml
0 → 100644
View file @
c7c477c7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
# or use local absolute path
trust_remote_code
:
true
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_rank
:
8
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
dataset_dir
:
REMOTE:llamafactory/demo_data
# or use local absolute path
template
:
llama3
cutoff_len
:
2048
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
output_dir
:
tmp_dir
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### ray
ray_run_name
:
llama3_8b_sft_lora
ray_storage_path
:
./saves
ray_num_workers
:
4
# Number of GPUs to use.
placement_strategy
:
PACK
resources_per_worker
:
GPU
:
1
# ray_init_kwargs:
# runtime_env:
# env_vars:
# <YOUR-ENV-VAR-HERE>: "<YOUR-ENV-VAR-HERE>"
# pip:
# - emoji
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
resume_from_checkpoint
:
null
### eval
# eval_dataset: alpaca_en_demo
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500
Prev
1
2
3
4
5
6
7
8
9
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment