Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
DISC-FinLLM_pytorch
Commits
afe180a6
Commit
afe180a6
authored
May 21, 2024
by
wanglch
Browse files
Initial commit
parents
Pipeline
#1006
canceled with stages
Changes
258
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
486 additions
and
0 deletions
+486
-0
LLaMA-Factory/examples/inference/api_demo.sh
LLaMA-Factory/examples/inference/api_demo.sh
+7
-0
LLaMA-Factory/examples/inference/cli_demo.sh
LLaMA-Factory/examples/inference/cli_demo.sh
+7
-0
LLaMA-Factory/examples/inference/evaluate.sh
LLaMA-Factory/examples/inference/evaluate.sh
+12
-0
LLaMA-Factory/examples/inference/web_demo.sh
LLaMA-Factory/examples/inference/web_demo.sh
+8
-0
LLaMA-Factory/examples/lora_multi_gpu/ds_zero3.sh
LLaMA-Factory/examples/lora_multi_gpu/ds_zero3.sh
+33
-0
LLaMA-Factory/examples/lora_multi_gpu/multi_node.sh
LLaMA-Factory/examples/lora_multi_gpu/multi_node.sh
+36
-0
LLaMA-Factory/examples/lora_multi_gpu/single_node.sh
LLaMA-Factory/examples/lora_multi_gpu/single_node.sh
+35
-0
LLaMA-Factory/examples/lora_single_gpu/dpo.sh
LLaMA-Factory/examples/lora_single_gpu/dpo.sh
+35
-0
LLaMA-Factory/examples/lora_single_gpu/orpo.sh
LLaMA-Factory/examples/lora_single_gpu/orpo.sh
+32
-0
LLaMA-Factory/examples/lora_single_gpu/ppo.sh
LLaMA-Factory/examples/lora_single_gpu/ppo.sh
+32
-0
LLaMA-Factory/examples/lora_single_gpu/predict.sh
LLaMA-Factory/examples/lora_single_gpu/predict.sh
+19
-0
LLaMA-Factory/examples/lora_single_gpu/prepare.sh
LLaMA-Factory/examples/lora_single_gpu/prepare.sh
+18
-0
LLaMA-Factory/examples/lora_single_gpu/pretrain.sh
LLaMA-Factory/examples/lora_single_gpu/pretrain.sh
+31
-0
LLaMA-Factory/examples/lora_single_gpu/reward.sh
LLaMA-Factory/examples/lora_single_gpu/reward.sh
+33
-0
LLaMA-Factory/examples/lora_single_gpu/sft.sh
LLaMA-Factory/examples/lora_single_gpu/sft.sh
+32
-0
LLaMA-Factory/examples/lora_single_gpu/sft_mllm.sh
LLaMA-Factory/examples/lora_single_gpu/sft_mllm.sh
+33
-0
LLaMA-Factory/examples/merge_lora/merge.sh
LLaMA-Factory/examples/merge_lora/merge.sh
+12
-0
LLaMA-Factory/examples/merge_lora/quantize.sh
LLaMA-Factory/examples/merge_lora/quantize.sh
+11
-0
LLaMA-Factory/examples/qlora_single_gpu/aqlm.sh
LLaMA-Factory/examples/qlora_single_gpu/aqlm.sh
+30
-0
LLaMA-Factory/examples/qlora_single_gpu/awq.sh
LLaMA-Factory/examples/qlora_single_gpu/awq.sh
+30
-0
No files found.
LLaMA-Factory/examples/inference/api_demo.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0
API_PORT
=
8000 python ../../src/api_demo.py
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft
\
--template
default
\
--finetuning_type
lora
LLaMA-Factory/examples/inference/cli_demo.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/cli_demo.py
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft
\
--template
default
\
--finetuning_type
lora
LLaMA-Factory/examples/inference/evaluate.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/evaluate.py
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft
\
--template
fewshot
\
--finetuning_type
lora
\
--task
mmlu
\
--split
test
\
--lang
en
\
--n_shot
5
\
--batch_size
4
LLaMA-Factory/examples/inference/web_demo.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
# add `--visual_inputs True` to load MLLM
CUDA_VISIBLE_DEVICES
=
0 python ../../src/web_demo.py
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft
\
--template
default
\
--finetuning_type
lora
LLaMA-Factory/examples/lora_multi_gpu/ds_zero3.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
deepspeed
--num_gpus
4 ../../src/train_bash.py
\
--deepspeed
../deepspeed/ds_z3_config.json
\
--stage
sft
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--dataset
alpaca_gpt4_en,glaive_toolcall
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/sft
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
2
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--learning_rate
5e-5
\
--num_train_epochs
3.0
\
--max_samples
3000
\
--val_size
0.1
\
--ddp_timeout
180000000
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_multi_gpu/multi_node.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
# also launch it on slave machine using slave_config.yaml
CUDA_VISIBLE_DEVICES
=
0,1,2,3 accelerate launch
\
--config_file
../accelerate/master_config.yaml
\
../../src/train_bash.py
\
--stage
sft
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--dataset
alpaca_gpt4_en,glaive_toolcall
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/sft
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
2
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
5e-5
\
--num_train_epochs
3.0
\
--max_samples
3000
\
--val_size
0.1
\
--ddp_timeout
180000000
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_multi_gpu/single_node.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0,1,2,3 accelerate launch
\
--config_file
../accelerate/single_config.yaml
\
../../src/train_bash.py
\
--stage
sft
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--dataset
alpaca_gpt4_en,glaive_toolcall
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/sft
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
2
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
5e-5
\
--num_train_epochs
3.0
\
--max_samples
3000
\
--val_size
0.1
\
--ddp_timeout
180000000
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_single_gpu/dpo.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
dpo
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft
\
--create_new_adapter
\
--dataset
orca_rlhf
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/dpo
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
1e-5
\
--num_train_epochs
1.0
\
--max_samples
1000
\
--val_size
0.1
\
--dpo_ftx
1.0
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_single_gpu/orpo.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
orpo
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--dataset
orca_rlhf
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/orpo
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
1e-5
\
--num_train_epochs
1.0
\
--max_samples
1000
\
--val_size
0.1
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_single_gpu/ppo.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
ppo
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft
\
--create_new_adapter
\
--dataset
alpaca_gpt4_en
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--reward_model
../../saves/LLaMA2-7B/lora/reward
\
--output_dir
../../saves/LLaMA2-7B/lora/ppo
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
512
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--save_steps
100
\
--learning_rate
1e-5
\
--num_train_epochs
1.0
\
--max_samples
1000
\
--top_k
0
\
--top_p
0.9
\
--max_new_tokens
256
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_single_gpu/predict.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
sft
\
--do_predict
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft,../../saves/LLaMA2-7B/lora/dpo
\
--dataset
alpaca_gpt4_en,glaive_toolcall
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--output_dir
../../saves/LLaMA2-7B/lora/predict
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_eval_batch_size
1
\
--max_samples
20
\
--predict_with_generate
LLaMA-Factory/examples/lora_single_gpu/prepare.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
python ../../src/train_bash.py
\
--stage
sft
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--dataset
alpaca_gpt4_en,glaive_toolcall
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/sft
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--max_samples
3000
\
--tokenized_path
../../saves/datasets/sft
LLaMA-Factory/examples/lora_single_gpu/pretrain.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
pt
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--dataset
c4_demo
\
--dataset_dir
../../data
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/pretrain
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
5e-5
\
--num_train_epochs
3.0
\
--max_samples
10000
\
--val_size
0.1
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_single_gpu/reward.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
rm
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft
\
--create_new_adapter
\
--dataset
orca_rlhf
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/reward
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--learning_rate
1e-5
\
--num_train_epochs
1.0
\
--max_samples
5000
\
--val_size
0.1
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_single_gpu/sft.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
sft
\
--do_train
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--dataset
alpaca_gpt4_en,glaive_toolcall
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/sft
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
5e-5
\
--num_train_epochs
3.0
\
--max_samples
3000
\
--val_size
0.1
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/lora_single_gpu/sft_mllm.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
sft
\
--do_train
\
--model_name_or_path
llava-hf/llava-1.5-7b-hf
\
--visual_inputs
\
--dataset
mllm_demo
\
--dataset_dir
../../data
\
--template
vicuna
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/sft_mllm
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--preprocessing_num_workers
16
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--warmup_steps
20
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
5e-5
\
--num_train_epochs
100.0
\
--max_samples
3000
\
--val_size
0.1
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/merge_lora/merge.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
# DO NOT use quantized model or quantization_bit when merging lora weights
CUDA_VISIBLE_DEVICES
=
0 python ../../src/export_model.py
\
--model_name_or_path
meta-llama/Llama-2-7b-hf
\
--adapter_name_or_path
../../saves/LLaMA2-7B/lora/sft
\
--template
default
\
--finetuning_type
lora
\
--export_dir
../../models/llama2-7b-sft
\
--export_size
2
\
--export_device
cpu
\
--export_legacy_format
False
LLaMA-Factory/examples/merge_lora/quantize.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
# NEED TO run `merge.sh` before using this script
CUDA_VISIBLE_DEVICES
=
0 python ../../src/export_model.py
\
--model_name_or_path
../../models/llama2-7b-sft
\
--template
default
\
--export_dir
../../models/llama2-7b-sft-int4
\
--export_quantization_bit
4
\
--export_quantization_dataset
../../data/c4_demo.json
\
--export_size
2
\
--export_legacy_format
False
LLaMA-Factory/examples/qlora_single_gpu/aqlm.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
sft
\
--do_train
\
--model_name_or_path
BlackSamorez/Llama-2-7b-AQLM-2Bit-1x16-hf
\
--dataset
alpaca_gpt4_en,glaive_toolcall
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/sft
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
5e-5
\
--num_train_epochs
3.0
\
--max_samples
3000
\
--val_size
0.1
\
--plot_loss
\
--fp16
LLaMA-Factory/examples/qlora_single_gpu/awq.sh
0 → 100644
View file @
afe180a6
#!/bin/bash
CUDA_VISIBLE_DEVICES
=
0 python ../../src/train_bash.py
\
--stage
sft
\
--do_train
\
--model_name_or_path
TheBloke/Llama-2-7B-AWQ
\
--dataset
alpaca_gpt4_en,glaive_toolcall
\
--dataset_dir
../../data
\
--template
default
\
--finetuning_type
lora
\
--lora_target
q_proj,v_proj
\
--output_dir
../../saves/LLaMA2-7B/lora/sft
\
--overwrite_cache
\
--overwrite_output_dir
\
--cutoff_len
1024
\
--per_device_train_batch_size
1
\
--per_device_eval_batch_size
1
\
--gradient_accumulation_steps
8
\
--lr_scheduler_type
cosine
\
--logging_steps
10
\
--save_steps
100
\
--eval_steps
100
\
--evaluation_strategy
steps
\
--load_best_model_at_end
\
--learning_rate
5e-5
\
--num_train_epochs
3.0
\
--max_samples
3000
\
--val_size
0.1
\
--plot_loss
\
--fp16
Prev
1
2
3
4
5
6
7
8
9
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment