Commit 581d366d authored by chenych's avatar chenych
Browse files

Support GLM-4/GLM-4-0414/GLM-Z1

parent 428c5813
...@@ -12,8 +12,13 @@ ARG INSTALL_DEEPSPEED=false ...@@ -12,8 +12,13 @@ ARG INSTALL_DEEPSPEED=false
ARG INSTALL_FLASHATTN=false ARG INSTALL_FLASHATTN=false
ARG INSTALL_LIGER_KERNEL=false ARG INSTALL_LIGER_KERNEL=false
ARG INSTALL_HQQ=false ARG INSTALL_HQQ=false
ARG INSTALL_PYTORCH=true
ARG PIP_INDEX=https://pypi.org/simple ARG PIP_INDEX=https://pypi.org/simple
ARG HTTP_PROXY= ARG HTTP_PROXY=
ARG PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/rocm6.3
# Use Bash instead of default /bin/sh
SHELL ["/bin/bash", "-c"]
# Set the working directory # Set the working directory
WORKDIR /app WORKDIR /app
...@@ -62,6 +67,13 @@ RUN EXTRA_PACKAGES="metrics"; \ ...@@ -62,6 +67,13 @@ RUN EXTRA_PACKAGES="metrics"; \
pip install -e ".[$EXTRA_PACKAGES]"; \ pip install -e ".[$EXTRA_PACKAGES]"; \
fi fi
# Reinstall pytorch
# This is necessary to ensure that the correct version of PyTorch is installed
RUN if [ "$INSTALL_PYTORCH" == "true" ]; then \
pip uninstall -y torch torchvision torchaudio && \
pip install --pre torch torchvision torchaudio --index-url "$PYTORCH_INDEX"; \
fi
# Rebuild flash attention # Rebuild flash attention
RUN pip uninstall -y transformer-engine flash-attn && \ RUN pip uninstall -y transformer-engine flash-attn && \
if [ "$INSTALL_FLASHATTN" == "true" ]; then \ if [ "$INSTALL_FLASHATTN" == "true" ]; then \
......
...@@ -9,8 +9,10 @@ services: ...@@ -9,8 +9,10 @@ services:
INSTALL_DEEPSPEED: "false" INSTALL_DEEPSPEED: "false"
INSTALL_FLASHATTN: "false" INSTALL_FLASHATTN: "false"
INSTALL_LIGER_KERNEL: "false" INSTALL_LIGER_KERNEL: "false"
INSTALL_PYTORCH: "true"
INSTALL_HQQ: "false" INSTALL_HQQ: "false"
PIP_INDEX: https://pypi.org/simple PIP_INDEX: https://pypi.org/simple
PYTORCH_INDEX: https://download.pytorch.org/whl/nightly/rocm6.3
container_name: llamafactory container_name: llamafactory
volumes: volumes:
- ../../hf_cache:/root/.cache/huggingface - ../../hf_cache:/root/.cache/huggingface
......
...@@ -15,6 +15,7 @@ cutoff_len: 2048 ...@@ -15,6 +15,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/qwen2-1_5b/full/sft output_dir: saves/qwen2-1_5b/full/sft
...@@ -22,6 +23,8 @@ logging_steps: 10 ...@@ -22,6 +23,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -20,6 +20,7 @@ cutoff_len: 2048 ...@@ -20,6 +20,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/full/sft output_dir: saves/llama3-8b/full/sft
...@@ -27,6 +28,8 @@ logging_steps: 10 ...@@ -27,6 +28,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -20,6 +20,7 @@ cutoff_len: 2048 ...@@ -20,6 +20,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/full/sft output_dir: saves/llama3-8b/full/sft
...@@ -27,6 +28,8 @@ logging_steps: 10 ...@@ -27,6 +28,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -17,6 +17,7 @@ cutoff_len: 2048 ...@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/sft output_dir: saves/llama3-8b/lora/sft
...@@ -24,6 +25,8 @@ logging_steps: 10 ...@@ -24,6 +25,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -19,6 +19,7 @@ cutoff_len: 2048 ...@@ -19,6 +19,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/full/sft output_dir: saves/llama3-8b/full/sft
...@@ -26,6 +27,8 @@ logging_steps: 10 ...@@ -26,6 +27,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -17,6 +17,7 @@ cutoff_len: 2048 ...@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b-pro/freeze/sft output_dir: saves/llama3-8b-pro/freeze/sft
...@@ -24,6 +25,8 @@ logging_steps: 10 ...@@ -24,6 +25,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -17,6 +17,7 @@ cutoff_len: 2048 ...@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/sft output_dir: saves/llama3-8b/lora/sft
...@@ -24,6 +25,8 @@ logging_steps: 10 ...@@ -24,6 +25,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -15,6 +15,7 @@ cutoff_len: 2048 ...@@ -15,6 +15,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b-mod/full/sft output_dir: saves/llama3-8b-mod/full/sft
...@@ -22,6 +23,8 @@ logging_steps: 10 ...@@ -22,6 +23,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -18,10 +18,12 @@ cutoff_len: 2048 ...@@ -18,10 +18,12 @@ cutoff_len: 2048
max_samples: 50 max_samples: 50
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/predict output_dir: saves/llama3-8b/lora/predict
overwrite_output_dir: true overwrite_output_dir: true
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### eval ### eval
per_device_eval_batch_size: 1 per_device_eval_batch_size: 1
......
...@@ -19,6 +19,7 @@ cutoff_len: 2048 ...@@ -19,6 +19,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/sft output_dir: saves/llama3-8b/lora/sft
...@@ -26,6 +27,8 @@ logging_steps: 10 ...@@ -26,6 +27,8 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -24,6 +24,7 @@ save_steps: 500 ...@@ -24,6 +24,7 @@ save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -8,10 +8,10 @@ trust_remote_code: true ...@@ -8,10 +8,10 @@ trust_remote_code: true
stage: sft stage: sft
do_train: true do_train: true
finetuning_type: full finetuning_type: full
freeze_vision_tower: true # choices: [true, false] freeze_vision_tower: true
freeze_multi_modal_projector: true # choices: [true, false] freeze_multi_modal_projector: true
freeze_language_model: false # choices: [true, false] freeze_language_model: false
deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json] deepspeed: examples/deepspeed/ds_z3_config.json
### dataset ### dataset
dataset: mllm_demo,identity,alpaca_en_demo dataset: mllm_demo,identity,alpaca_en_demo
...@@ -29,6 +29,7 @@ save_steps: 500 ...@@ -29,6 +29,7 @@ save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -27,6 +27,7 @@ save_steps: 500 ...@@ -27,6 +27,7 @@ save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -17,6 +17,7 @@ cutoff_len: 2048 ...@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/kto output_dir: saves/llama3-8b/lora/kto
...@@ -24,6 +25,7 @@ logging_steps: 10 ...@@ -24,6 +25,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -17,6 +17,7 @@ cutoff_len: 2048 ...@@ -17,6 +17,7 @@ cutoff_len: 2048
max_samples: 1000 max_samples: 1000
overwrite_cache: true overwrite_cache: true
preprocessing_num_workers: 16 preprocessing_num_workers: 16
dataloader_num_workers: 4
### output ### output
output_dir: saves/llama3-8b/lora/ppo output_dir: saves/llama3-8b/lora/ppo
...@@ -24,6 +25,7 @@ logging_steps: 10 ...@@ -24,6 +25,7 @@ logging_steps: 10
save_steps: 500 save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -24,6 +24,7 @@ save_steps: 500 ...@@ -24,6 +24,7 @@ save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -25,6 +25,7 @@ save_steps: 500 ...@@ -25,6 +25,7 @@ save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
...@@ -25,6 +25,7 @@ save_steps: 500 ...@@ -25,6 +25,7 @@ save_steps: 500
plot_loss: true plot_loss: true
overwrite_output_dir: true overwrite_output_dir: true
save_only_model: false save_only_model: false
report_to: none # choices: [none, wandb, tensorboard, swanlab, mlflow]
### train ### train
per_device_train_batch_size: 1 per_device_train_batch_size: 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment