Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Qwen2.5_pytorch
Commits
802ef8b7
"vscode:/vscode.git/clone" did not exist on "c52cdd627833fefc6bfc5ce5654c137e73c1983b"
Commit
802ef8b7
authored
Oct 11, 2024
by
luopl
Browse files
init
parents
Pipeline
#1743
failed with stages
in 0 seconds
Changes
263
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
472 additions
and
0 deletions
+472
-0
LLaMA-Factory/examples/deepspeed/ds_z2_offload_config.json
LLaMA-Factory/examples/deepspeed/ds_z2_offload_config.json
+33
-0
LLaMA-Factory/examples/deepspeed/ds_z3_config.json
LLaMA-Factory/examples/deepspeed/ds_z3_config.json
+31
-0
LLaMA-Factory/examples/deepspeed/ds_z3_offload_config.json
LLaMA-Factory/examples/deepspeed/ds_z3_offload_config.json
+39
-0
LLaMA-Factory/examples/extras/adam_mini/qwen2_full_sft.yaml
LLaMA-Factory/examples/extras/adam_mini/qwen2_full_sft.yaml
+39
-0
LLaMA-Factory/examples/extras/badam/llama3_full_sft.yaml
LLaMA-Factory/examples/extras/badam/llama3_full_sft.yaml
+42
-0
LLaMA-Factory/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
...A-Factory/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
+40
-0
LLaMA-Factory/examples/extras/fsdp_qlora/train.sh
LLaMA-Factory/examples/extras/fsdp_qlora/train.sh
+6
-0
LLaMA-Factory/examples/extras/galore/llama3_full_sft.yaml
LLaMA-Factory/examples/extras/galore/llama3_full_sft.yaml
+43
-0
LLaMA-Factory/examples/extras/llama_pro/expand.sh
LLaMA-Factory/examples/extras/llama_pro/expand.sh
+6
-0
LLaMA-Factory/examples/extras/llama_pro/llama3_freeze_sft.yaml
...-Factory/examples/extras/llama_pro/llama3_freeze_sft.yaml
+41
-0
LLaMA-Factory/examples/extras/loraplus/llama3_lora_sft.yaml
LLaMA-Factory/examples/extras/loraplus/llama3_lora_sft.yaml
+40
-0
LLaMA-Factory/examples/extras/mod/llama3_full_sft.yaml
LLaMA-Factory/examples/extras/mod/llama3_full_sft.yaml
+40
-0
LLaMA-Factory/examples/extras/pissa/init.sh
LLaMA-Factory/examples/extras/pissa/init.sh
+5
-0
LLaMA-Factory/examples/extras/pissa/llama3_lora_sft.yaml
LLaMA-Factory/examples/extras/pissa/llama3_lora_sft.yaml
+42
-0
LLaMA-Factory/examples/inference/llama3.yaml
LLaMA-Factory/examples/inference/llama3.yaml
+2
-0
LLaMA-Factory/examples/inference/llama3_lora_sft.yaml
LLaMA-Factory/examples/inference/llama3_lora_sft.yaml
+4
-0
LLaMA-Factory/examples/inference/llama3_vllm.yaml
LLaMA-Factory/examples/inference/llama3_vllm.yaml
+4
-0
LLaMA-Factory/examples/inference/llava1_5.yaml
LLaMA-Factory/examples/inference/llava1_5.yaml
+2
-0
LLaMA-Factory/examples/inference/qwen2_vl.yaml
LLaMA-Factory/examples/inference/qwen2_vl.yaml
+2
-0
LLaMA-Factory/examples/merge_lora/llama3_gptq.yaml
LLaMA-Factory/examples/merge_lora/llama3_gptq.yaml
+11
-0
No files found.
LLaMA-Factory/examples/deepspeed/ds_z2_offload_config.json
0 → 100644
View file @
802ef8b7
{
"train_batch_size"
:
"auto"
,
"train_micro_batch_size_per_gpu"
:
"auto"
,
"gradient_accumulation_steps"
:
"auto"
,
"gradient_clipping"
:
"auto"
,
"zero_allow_untested_optimizer"
:
true
,
"fp16"
:
{
"enabled"
:
"auto"
,
"loss_scale"
:
0
,
"loss_scale_window"
:
1000
,
"initial_scale_power"
:
16
,
"hysteresis"
:
2
,
"min_loss_scale"
:
1
},
"bf16"
:
{
"enabled"
:
"auto"
},
"zero_optimization"
:
{
"stage"
:
2
,
"offload_optimizer"
:
{
"device"
:
"cpu"
,
"pin_memory"
:
true
},
"allgather_partitions"
:
true
,
"allgather_bucket_size"
:
5e8
,
"overlap_comm"
:
true
,
"reduce_scatter"
:
true
,
"reduce_bucket_size"
:
5e8
,
"contiguous_gradients"
:
true
,
"round_robin_gradients"
:
true
}
}
\ No newline at end of file
LLaMA-Factory/examples/deepspeed/ds_z3_config.json
0 → 100644
View file @
802ef8b7
{
"train_batch_size"
:
"auto"
,
"train_micro_batch_size_per_gpu"
:
"auto"
,
"gradient_accumulation_steps"
:
"auto"
,
"gradient_clipping"
:
"auto"
,
"zero_allow_untested_optimizer"
:
true
,
"fp16"
:
{
"enabled"
:
"auto"
,
"loss_scale"
:
0
,
"loss_scale_window"
:
1000
,
"initial_scale_power"
:
16
,
"hysteresis"
:
2
,
"min_loss_scale"
:
1
},
"bf16"
:
{
"enabled"
:
"auto"
},
"zero_optimization"
:
{
"stage"
:
3
,
"overlap_comm"
:
true
,
"contiguous_gradients"
:
true
,
"sub_group_size"
:
1e9
,
"reduce_bucket_size"
:
"auto"
,
"stage3_prefetch_bucket_size"
:
"auto"
,
"stage3_param_persistence_threshold"
:
"auto"
,
"stage3_max_live_parameters"
:
1e9
,
"stage3_max_reuse_distance"
:
1e9
,
"stage3_gather_16bit_weights_on_model_save"
:
true
}
}
\ No newline at end of file
LLaMA-Factory/examples/deepspeed/ds_z3_offload_config.json
0 → 100644
View file @
802ef8b7
{
"train_batch_size"
:
"auto"
,
"train_micro_batch_size_per_gpu"
:
"auto"
,
"gradient_accumulation_steps"
:
"auto"
,
"gradient_clipping"
:
"auto"
,
"zero_allow_untested_optimizer"
:
true
,
"fp16"
:
{
"enabled"
:
"auto"
,
"loss_scale"
:
0
,
"loss_scale_window"
:
1000
,
"initial_scale_power"
:
16
,
"hysteresis"
:
2
,
"min_loss_scale"
:
1
},
"bf16"
:
{
"enabled"
:
"auto"
},
"zero_optimization"
:
{
"stage"
:
3
,
"offload_optimizer"
:
{
"device"
:
"cpu"
,
"pin_memory"
:
true
},
"offload_param"
:
{
"device"
:
"cpu"
,
"pin_memory"
:
true
},
"overlap_comm"
:
true
,
"contiguous_gradients"
:
true
,
"sub_group_size"
:
1e9
,
"reduce_bucket_size"
:
"auto"
,
"stage3_prefetch_bucket_size"
:
"auto"
,
"stage3_param_persistence_threshold"
:
"auto"
,
"stage3_max_live_parameters"
:
1e9
,
"stage3_max_reuse_distance"
:
1e9
,
"stage3_gather_16bit_weights_on_model_save"
:
true
}
}
\ No newline at end of file
LLaMA-Factory/examples/extras/adam_mini/qwen2_full_sft.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
Qwen/Qwen2-1.5B-Instruct
### method
stage
:
sft
do_train
:
true
finetuning_type
:
full
use_adam_mini
:
true
### dataset
dataset
:
identity,alpaca_en_demo
template
:
qwen
cutoff_len
:
1024
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/qwen2-1_5b/full/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-5
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/extras/badam/llama3_full_sft.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
### method
stage
:
sft
do_train
:
true
finetuning_type
:
full
use_badam
:
true
badam_mode
:
layer
badam_switch_mode
:
ascending
badam_switch_interval
:
50
badam_verbose
:
2
# deepspeed: examples/deepspeed/ds_z3_config.json
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
1024
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/full/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-5
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/extras/fsdp_qlora/llama3_lora_sft.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
quantization_bit
:
4
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
1024
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/extras/fsdp_qlora/train.sh
0 → 100644
View file @
802ef8b7
#!/bin/bash
# DO NOT use GPTQ/AWQ model in FSDP+QLoRA
CUDA_VISIBLE_DEVICES
=
0,1 accelerate launch
\
--config_file
examples/accelerate/fsdp_config.yaml
\
src/train.py examples/extras/fsdp_qlora/llama3_lora_sft.yaml
LLaMA-Factory/examples/extras/galore/llama3_full_sft.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
### method
stage
:
sft
do_train
:
true
finetuning_type
:
full
use_galore
:
true
galore_layerwise
:
true
galore_target
:
mlp,self_attn
galore_rank
:
128
galore_scale
:
2.0
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
1024
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/full/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
1
learning_rate
:
1.0e-5
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
pure_bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/extras/llama_pro/expand.sh
0 → 100644
View file @
802ef8b7
#!/bin/bash
python scripts/llama_pro.py
\
--model_name_or_path
meta-llama/Meta-Llama-3-8B-Instruct
\
--output_dir
models/llama3-8b-pro
\
--num_expand
8
LLaMA-Factory/examples/extras/llama_pro/llama3_freeze_sft.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
models/llama3-8b-pro
### method
stage
:
sft
do_train
:
true
finetuning_type
:
freeze
freeze_trainable_layers
:
8
freeze_trainable_modules
:
all
use_llama_pro
:
true
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
1024
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b-pro/freeze/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/extras/loraplus/llama3_lora_sft.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
loraplus_lr_ratio
:
16.0
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
1024
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/extras/mod/llama3_full_sft.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
### method
stage
:
sft
do_train
:
true
finetuning_type
:
full
mixture_of_depths
:
convert
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
1024
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b-mod/full/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
optim
:
paged_adamw_8bit
learning_rate
:
1.0e-5
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
pure_bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/extras/pissa/init.sh
0 → 100644
View file @
802ef8b7
#!/bin/bash
python scripts/pissa_init.py
\
--model_name_or_path
meta-llama/Meta-Llama-3-8B-Instruct
\
--output_dir
models/llama3-8b-pissa
LLaMA-Factory/examples/extras/pissa/llama3_lora_sft.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
### method
stage
:
sft
do_train
:
true
finetuning_type
:
lora
lora_target
:
all
pissa_init
:
true
pissa_iter
:
16
pissa_convert
:
true
### dataset
dataset
:
identity,alpaca_en_demo
template
:
llama3
cutoff_len
:
1024
max_samples
:
1000
overwrite_cache
:
true
preprocessing_num_workers
:
16
### output
output_dir
:
saves/llama3-8b/lora/sft
logging_steps
:
10
save_steps
:
500
plot_loss
:
true
overwrite_output_dir
:
true
### train
per_device_train_batch_size
:
1
gradient_accumulation_steps
:
8
learning_rate
:
1.0e-4
num_train_epochs
:
3.0
lr_scheduler_type
:
cosine
warmup_ratio
:
0.1
bf16
:
true
ddp_timeout
:
180000000
### eval
val_size
:
0.1
per_device_eval_batch_size
:
1
eval_strategy
:
steps
eval_steps
:
500
LLaMA-Factory/examples/inference/llama3.yaml
0 → 100644
View file @
802ef8b7
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
template
:
llama3
LLaMA-Factory/examples/inference/llama3_lora_sft.yaml
0 → 100644
View file @
802ef8b7
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
adapter_name_or_path
:
saves/llama3-8b/lora/sft
template
:
llama3
finetuning_type
:
lora
LLaMA-Factory/examples/inference/llama3_vllm.yaml
0 → 100644
View file @
802ef8b7
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
template
:
llama3
infer_backend
:
vllm
vllm_enforce_eager
:
true
LLaMA-Factory/examples/inference/llava1_5.yaml
0 → 100644
View file @
802ef8b7
model_name_or_path
:
llava-hf/llava-1.5-7b-hf
template
:
llava
LLaMA-Factory/examples/inference/qwen2_vl.yaml
0 → 100644
View file @
802ef8b7
model_name_or_path
:
Qwen/Qwen2-VL-7B-Instruct
template
:
qwen2_vl
LLaMA-Factory/examples/merge_lora/llama3_gptq.yaml
0 → 100644
View file @
802ef8b7
### model
model_name_or_path
:
meta-llama/Meta-Llama-3-8B-Instruct
template
:
llama3
### export
export_dir
:
models/llama3_gptq
export_quantization_bit
:
4
export_quantization_dataset
:
data/c4_demo.json
export_size
:
2
export_device
:
cpu
export_legacy_format
:
false
Prev
1
2
3
4
5
6
7
8
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment