Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Firefly-Llama3_unsloth
Commits
d3e0fa63
Commit
d3e0fa63
authored
Jul 31, 2024
by
chenzk
Browse files
v1.0.3
parents
Pipeline
#1452
canceled with stages
Changes
204
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
514 additions
and
0 deletions
+514
-0
train_args/sft/qlora/baichuan-13b-sft-qlora.json
train_args/sft/qlora/baichuan-13b-sft-qlora.json
+32
-0
train_args/sft/qlora/baichuan-7b-sft-qlora.json
train_args/sft/qlora/baichuan-7b-sft-qlora.json
+32
-0
train_args/sft/qlora/baichuan2-13b-sft-qlora.json
train_args/sft/qlora/baichuan2-13b-sft-qlora.json
+32
-0
train_args/sft/qlora/baichuan2-7b-sft-qlora.json
train_args/sft/qlora/baichuan2-7b-sft-qlora.json
+32
-0
train_args/sft/qlora/bloom-7b1-sft-qlora.json
train_args/sft/qlora/bloom-7b1-sft-qlora.json
+32
-0
train_args/sft/qlora/chatglm2-6b-sft-qlora.json
train_args/sft/qlora/chatglm2-6b-sft-qlora.json
+32
-0
train_args/sft/qlora/chatglm3-6b-sft-qlora.json
train_args/sft/qlora/chatglm3-6b-sft-qlora.json
+32
-0
train_args/sft/qlora/deepseek-7b-sft-qlora.json
train_args/sft/qlora/deepseek-7b-sft-qlora.json
+32
-0
train_args/sft/qlora/deepseek-moe-16b-sft-qlora.json
train_args/sft/qlora/deepseek-moe-16b-sft-qlora.json
+32
-0
train_args/sft/qlora/gemma-2b-sft-qlora.json
train_args/sft/qlora/gemma-2b-sft-qlora.json
+33
-0
train_args/sft/qlora/gemma-7b-sft-qlora.json
train_args/sft/qlora/gemma-7b-sft-qlora.json
+33
-0
train_args/sft/qlora/internlm-20b-sft-qlora.json
train_args/sft/qlora/internlm-20b-sft-qlora.json
+32
-0
train_args/sft/qlora/internlm-7b-sft-qlora.json
train_args/sft/qlora/internlm-7b-sft-qlora.json
+32
-0
train_args/sft/qlora/internlm2-20b-sft-qlora.json
train_args/sft/qlora/internlm2-20b-sft-qlora.json
+32
-0
train_args/sft/qlora/internlm2-7b-sft-qlora.json
train_args/sft/qlora/internlm2-7b-sft-qlora.json
+32
-0
train_args/sft/qlora/llama2-13b-sft-qlora.json
train_args/sft/qlora/llama2-13b-sft-qlora.json
+32
-0
train_args/sft/qlora/llama2-7b-sft-qlora.json
train_args/sft/qlora/llama2-7b-sft-qlora.json
+0
-0
train_args/sft/qlora/llama3-8b-sft-qlora.json
train_args/sft/qlora/llama3-8b-sft-qlora.json
+0
-0
train_args/sft/qlora/minicpm-2b-sft-qlora.json
train_args/sft/qlora/minicpm-2b-sft-qlora.json
+0
-0
train_args/sft/qlora/mistral-7b-sft-qlora.json
train_args/sft/qlora/mistral-7b-sft-qlora.json
+0
-0
No files found.
train_args/sft/qlora/baichuan-13b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-baichuan-13b-sft-qlora"
,
"model_name_or_path"
:
"baichuan-inc/Baichuan-13B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"baichuan"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
5
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/baichuan-7b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-baichuan-7b-sft-qlora"
,
"model_name_or_path"
:
"baichuan-inc/baichuan-7B"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"baichuan"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/baichuan2-13b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-baichuan2-13b-sft-qlora"
,
"model_name_or_path"
:
"baichuan-inc/Baichuan2-13B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"baichuan2"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/baichuan2-7b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-baichuan2-7b-sft-qlora"
,
"model_name_or_path"
:
"baichuan-inc/Baichuan2-7B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"baichuan2"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/bloom-7b1-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-bloom-7b1-sft-qlora"
,
"model_name_or_path"
:
"bigscience/bloom-7b1"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"default"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/chatglm2-6b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-chatglm2-6b-sft-qlora"
,
"model_name_or_path"
:
"THUDM/chatglm2-6b"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"chatglm2"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/chatglm3-6b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-chatglm3-6b-sft-qlora"
,
"model_name_or_path"
:
"THUDM/chatglm3-6b"
,
"train_file"
:
"./data/dummy_data_chatglm3.jsonl"
,
"template_name"
:
"chatglm3"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/deepseek-7b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-deepseek-7b-sft-qlora"
,
"model_name_or_path"
:
"deepseek-ai/deepseek-llm-7b-chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"deepseek"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/deepseek-moe-16b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-deepseek-moe-16b-sft-qlora"
,
"model_name_or_path"
:
"deepseek-ai/deepseek-moe-16b-chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"deepseek"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/gemma-2b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-gemma-2b-sft-qlora"
,
"model_name_or_path"
:
"google/gemma-2b-it"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"gemma"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"use_unsloth"
:
true
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/gemma-7b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-gemma-7b-sft-qlora"
,
"model_name_or_path"
:
"google/gemma-7b-it"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"gemma"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"use_unsloth"
:
true
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/internlm-20b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-internlm-20b-sft-qlora"
,
"model_name_or_path"
:
"internlm/internlm-chat-20b"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"internlm"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/internlm-7b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-internlm-7b-sft-qlora"
,
"model_name_or_path"
:
"internlm/internlm-chat-7b"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"internlm"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/internlm2-20b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-internlm2-20b-sft-qlora"
,
"model_name_or_path"
:
"internlm/internlm2-chat-20b"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"internlm2"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/internlm2-7b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-internlm2-7b-sft-qlora"
,
"model_name_or_path"
:
"internlm/internlm2-chat-7b"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"internlm2"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/llama2-13b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
{
"output_dir"
:
"output/firefly-llama2-13b-sft-qlora"
,
"model_name_or_path"
:
"NousResearch/Llama-2-13b-chat-hf"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"llama2"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/llama2-7b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
This diff is collapsed.
Click to expand it.
train_args/sft/qlora/llama3-8b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
This diff is collapsed.
Click to expand it.
train_args/sft/qlora/minicpm-2b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
This diff is collapsed.
Click to expand it.
train_args/sft/qlora/mistral-7b-sft-qlora.json
0 → 100644
View file @
d3e0fa63
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
7
8
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment