Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
LLaMA-Factory
Commits
581d366d
Commit
581d366d
authored
Apr 15, 2025
by
chenych
Browse files
Support GLM-4/GLM-4-0414/GLM-Z1
parent
428c5813
Changes
107
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
57 additions
and
4 deletions
+57
-4
docker/docker-rocm/Dockerfile
docker/docker-rocm/Dockerfile
+12
-0
docker/docker-rocm/docker-compose.yml
docker/docker-rocm/docker-compose.yml
+2
-0
examples/extras/adam_mini/qwen2_full_sft.yaml
examples/extras/adam_mini/qwen2_full_sft.yaml
+3
-0
examples/extras/apollo/llama3_full_sft.yaml
examples/extras/apollo/llama3_full_sft.yaml
+3
-0
examples/extras/badam/llama3_full_sft.yaml
examples/extras/badam/llama3_full_sft.yaml
+3
-0
examples/extras/fsdp_qlora/llama3_lora_sft.yaml
examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+3
-0
examples/extras/galore/llama3_full_sft.yaml
examples/extras/galore/llama3_full_sft.yaml
+3
-0
examples/extras/llama_pro/llama3_freeze_sft.yaml
examples/extras/llama_pro/llama3_freeze_sft.yaml
+3
-0
examples/extras/loraplus/llama3_lora_sft.yaml
examples/extras/loraplus/llama3_lora_sft.yaml
+3
-0
examples/extras/mod/llama3_full_sft.yaml
examples/extras/mod/llama3_full_sft.yaml
+3
-0
examples/extras/nlg_eval/llama3_lora_predict.yaml
examples/extras/nlg_eval/llama3_lora_predict.yaml
+2
-0
examples/extras/pissa/llama3_lora_sft.yaml
examples/extras/pissa/llama3_lora_sft.yaml
+3
-0
examples/train_full/llama3_full_sft.yaml
examples/train_full/llama3_full_sft.yaml
+1
-0
examples/train_full/qwen2vl_full_sft.yaml
examples/train_full/qwen2vl_full_sft.yaml
+5
-4
examples/train_lora/llama3_lora_dpo.yaml
examples/train_lora/llama3_lora_dpo.yaml
+1
-0
examples/train_lora/llama3_lora_kto.yaml
examples/train_lora/llama3_lora_kto.yaml
+2
-0
examples/train_lora/llama3_lora_ppo.yaml
examples/train_lora/llama3_lora_ppo.yaml
+2
-0
examples/train_lora/llama3_lora_pretrain.yaml
examples/train_lora/llama3_lora_pretrain.yaml
+1
-0
examples/train_lora/llama3_lora_reward.yaml
examples/train_lora/llama3_lora_reward.yaml
+1
-0
examples/train_lora/llama3_lora_sft.yaml
examples/train_lora/llama3_lora_sft.yaml
+1
-0
No files found.
docker/docker-rocm/Dockerfile
View file @
581d366d
...
@@ -12,8 +12,13 @@ ARG INSTALL_DEEPSPEED=false
...
@@ -12,8 +12,13 @@ ARG INSTALL_DEEPSPEED=false
ARG
INSTALL_FLASHATTN=false
ARG
INSTALL_FLASHATTN=false
ARG
INSTALL_LIGER_KERNEL=false
ARG
INSTALL_LIGER_KERNEL=false
ARG
INSTALL_HQQ=false
ARG
INSTALL_HQQ=false
ARG
INSTALL_PYTORCH=true
ARG
PIP_INDEX=https://pypi.org/simple
ARG
PIP_INDEX=https://pypi.org/simple
ARG
HTTP_PROXY=
ARG
HTTP_PROXY=
ARG
PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/rocm6.3
# Use Bash instead of default /bin/sh
SHELL
["/bin/bash", "-c"]
# Set the working directory
# Set the working directory
WORKDIR
/app
WORKDIR
/app
...
@@ -62,6 +67,13 @@ RUN EXTRA_PACKAGES="metrics"; \
...
@@ -62,6 +67,13 @@ RUN EXTRA_PACKAGES="metrics"; \
pip
install
-e
".[
$EXTRA_PACKAGES
]"
;
\
pip
install
-e
".[
$EXTRA_PACKAGES
]"
;
\
fi
fi
# Reinstall pytorch
# This is necessary to ensure that the correct version of PyTorch is installed
RUN if
[
"
$INSTALL_PYTORCH
"
==
"true"
]
;
then
\
pip uninstall
-y
torch torchvision torchaudio
&&
\
pip
install
--pre
torch torchvision torchaudio
--index-url
"
$PYTORCH_INDEX
"
;
\
fi
# Rebuild flash attention
# Rebuild flash attention
RUN
pip uninstall
-y
transformer-engine flash-attn
&&
\
RUN
pip uninstall
-y
transformer-engine flash-attn
&&
\
if
[
"
$INSTALL_FLASHATTN
"
==
"true"
]
;
then
\
if
[
"
$INSTALL_FLASHATTN
"
==
"true"
]
;
then
\
...
...
docker/docker-rocm/docker-compose.yml
View file @
581d366d
...
@@ -9,8 +9,10 @@ services:
...
@@ -9,8 +9,10 @@ services:
INSTALL_DEEPSPEED
:
"
false"
INSTALL_DEEPSPEED
:
"
false"
INSTALL_FLASHATTN
:
"
false"
INSTALL_FLASHATTN
:
"
false"
INSTALL_LIGER_KERNEL
:
"
false"
INSTALL_LIGER_KERNEL
:
"
false"
INSTALL_PYTORCH
:
"
true"
INSTALL_HQQ
:
"
false"
INSTALL_HQQ
:
"
false"
PIP_INDEX
:
https://pypi.org/simple
PIP_INDEX
:
https://pypi.org/simple
PYTORCH_INDEX
:
https://download.pytorch.org/whl/nightly/rocm6.3
container_name
:
llamafactory
container_name
:
llamafactory
volumes
:
volumes
:
-
../../hf_cache:/root/.cache/huggingface
-
../../hf_cache:/root/.cache/huggingface
...
...
examples/extras/adam_mini/qwen2_full_sft.yaml
View file @
581d366d
...
@@ -15,6 +15,7 @@ cutoff_len: 2048
...
@@ -15,6 +15,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/qwen2-1_5b/full/sft
output_dir
:
saves/qwen2-1_5b/full/sft
...
@@ -22,6 +23,8 @@ logging_steps: 10
...
@@ -22,6 +23,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/extras/apollo/llama3_full_sft.yaml
View file @
581d366d
...
@@ -20,6 +20,7 @@ cutoff_len: 2048
...
@@ -20,6 +20,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/full/sft
output_dir
:
saves/llama3-8b/full/sft
...
@@ -27,6 +28,8 @@ logging_steps: 10
...
@@ -27,6 +28,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/extras/badam/llama3_full_sft.yaml
View file @
581d366d
...
@@ -20,6 +20,7 @@ cutoff_len: 2048
...
@@ -20,6 +20,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/full/sft
output_dir
:
saves/llama3-8b/full/sft
...
@@ -27,6 +28,8 @@ logging_steps: 10
...
@@ -27,6 +28,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/extras/fsdp_qlora/llama3_lora_sft.yaml
View file @
581d366d
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/lora/sft
output_dir
:
saves/llama3-8b/lora/sft
...
@@ -24,6 +25,8 @@ logging_steps: 10
...
@@ -24,6 +25,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/extras/galore/llama3_full_sft.yaml
View file @
581d366d
...
@@ -19,6 +19,7 @@ cutoff_len: 2048
...
@@ -19,6 +19,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/full/sft
output_dir
:
saves/llama3-8b/full/sft
...
@@ -26,6 +27,8 @@ logging_steps: 10
...
@@ -26,6 +27,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/extras/llama_pro/llama3_freeze_sft.yaml
View file @
581d366d
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b-pro/freeze/sft
output_dir
:
saves/llama3-8b-pro/freeze/sft
...
@@ -24,6 +25,8 @@ logging_steps: 10
...
@@ -24,6 +25,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/extras/loraplus/llama3_lora_sft.yaml
View file @
581d366d
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/lora/sft
output_dir
:
saves/llama3-8b/lora/sft
...
@@ -24,6 +25,8 @@ logging_steps: 10
...
@@ -24,6 +25,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/extras/mod/llama3_full_sft.yaml
View file @
581d366d
...
@@ -15,6 +15,7 @@ cutoff_len: 2048
...
@@ -15,6 +15,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b-mod/full/sft
output_dir
:
saves/llama3-8b-mod/full/sft
...
@@ -22,6 +23,8 @@ logging_steps: 10
...
@@ -22,6 +23,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/extras/nlg_eval/llama3_lora_predict.yaml
View file @
581d366d
...
@@ -18,10 +18,12 @@ cutoff_len: 2048
...
@@ -18,10 +18,12 @@ cutoff_len: 2048
max_samples
:
50
max_samples
:
50
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/lora/predict
output_dir
:
saves/llama3-8b/lora/predict
overwrite_output_dir
:
true
overwrite_output_dir
:
true
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### eval
### eval
per_device_eval_batch_size
:
1
per_device_eval_batch_size
:
1
...
...
examples/extras/pissa/llama3_lora_sft.yaml
View file @
581d366d
...
@@ -19,6 +19,7 @@ cutoff_len: 2048
...
@@ -19,6 +19,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/lora/sft
output_dir
:
saves/llama3-8b/lora/sft
...
@@ -26,6 +27,8 @@ logging_steps: 10
...
@@ -26,6 +27,8 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/train_full/llama3_full_sft.yaml
View file @
581d366d
...
@@ -24,6 +24,7 @@ save_steps: 500
...
@@ -24,6 +24,7 @@ save_steps: 500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/train_full/qwen2vl_full_sft.yaml
View file @
581d366d
...
@@ -8,10 +8,10 @@ trust_remote_code: true
...
@@ -8,10 +8,10 @@ trust_remote_code: true
stage
:
sft
stage
:
sft
do_train
:
true
do_train
:
true
finetuning_type
:
full
finetuning_type
:
full
freeze_vision_tower
:
true
# choices: [true, false]
freeze_vision_tower
:
true
freeze_multi_modal_projector
:
true
# choices: [true, false]
freeze_multi_modal_projector
:
true
freeze_language_model
:
false
# choices: [true, false]
freeze_language_model
:
false
deepspeed
:
examples/deepspeed/ds_z3_config.json
# choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
deepspeed
:
examples/deepspeed/ds_z3_config.json
### dataset
### dataset
dataset
:
mllm_demo,identity,alpaca_en_demo
dataset
:
mllm_demo,identity,alpaca_en_demo
...
@@ -29,6 +29,7 @@ save_steps: 500
...
@@ -29,6 +29,7 @@ save_steps: 500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/train_lora/llama3_lora_dpo.yaml
View file @
581d366d
...
@@ -27,6 +27,7 @@ save_steps: 500
...
@@ -27,6 +27,7 @@ save_steps: 500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/train_lora/llama3_lora_kto.yaml
View file @
581d366d
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/lora/kto
output_dir
:
saves/llama3-8b/lora/kto
...
@@ -24,6 +25,7 @@ logging_steps: 10
...
@@ -24,6 +25,7 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/train_lora/llama3_lora_ppo.yaml
View file @
581d366d
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
...
@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples
:
1000
max_samples
:
1000
overwrite_cache
:
true
overwrite_cache
:
true
preprocessing_num_workers
:
16
preprocessing_num_workers
:
16
dataloader_num_workers
:
4
### output
### output
output_dir
:
saves/llama3-8b/lora/ppo
output_dir
:
saves/llama3-8b/lora/ppo
...
@@ -24,6 +25,7 @@ logging_steps: 10
...
@@ -24,6 +25,7 @@ logging_steps: 10
save_steps
:
500
save_steps
:
500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/train_lora/llama3_lora_pretrain.yaml
View file @
581d366d
...
@@ -24,6 +24,7 @@ save_steps: 500
...
@@ -24,6 +24,7 @@ save_steps: 500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/train_lora/llama3_lora_reward.yaml
View file @
581d366d
...
@@ -25,6 +25,7 @@ save_steps: 500
...
@@ -25,6 +25,7 @@ save_steps: 500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
examples/train_lora/llama3_lora_sft.yaml
View file @
581d366d
...
@@ -25,6 +25,7 @@ save_steps: 500
...
@@ -25,6 +25,7 @@ save_steps: 500
plot_loss
:
true
plot_loss
:
true
overwrite_output_dir
:
true
overwrite_output_dir
:
true
save_only_model
:
false
save_only_model
:
false
report_to
:
none
# choices: [none, wandb, tensorboard, swanlab, mlflow]
### train
### train
per_device_train_batch_size
:
1
per_device_train_batch_size
:
1
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment