Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
Firefly-Llama3_unsloth
Commits
d74a64c4
Commit
d74a64c4
authored
Jul 31, 2024
by
chenzk
Browse files
v1.0
parents
Pipeline
#1450
canceled with stages
Changes
98
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
516 additions
and
0 deletions
+516
-0
train_args/sft/qlora/orion-14b-sft-qlora.json
train_args/sft/qlora/orion-14b-sft-qlora.json
+32
-0
train_args/sft/qlora/phi3-3.8b-sft-qlora.json
train_args/sft/qlora/phi3-3.8b-sft-qlora.json
+32
-0
train_args/sft/qlora/qwen-14b-sft-qlora.json
train_args/sft/qlora/qwen-14b-sft-qlora.json
+32
-0
train_args/sft/qlora/qwen-7b-sft-qlora.json
train_args/sft/qlora/qwen-7b-sft-qlora.json
+32
-0
train_args/sft/qlora/qwen1.5-7b-sft-qlora.json
train_args/sft/qlora/qwen1.5-7b-sft-qlora.json
+33
-0
train_args/sft/qlora/qwen1.5-moe-2.7b-sft-qlora.json
train_args/sft/qlora/qwen1.5-moe-2.7b-sft-qlora.json
+32
-0
train_args/sft/qlora/qwen2-7b-sft-qlora.json
train_args/sft/qlora/qwen2-7b-sft-qlora.json
+33
-0
train_args/sft/qlora/vicuna-13b-sft-qlora.json
train_args/sft/qlora/vicuna-13b-sft-qlora.json
+32
-0
train_args/sft/qlora/vicuna-7b-sft-qlora.json
train_args/sft/qlora/vicuna-7b-sft-qlora.json
+32
-0
train_args/sft/qlora/xverse-13b-sft-qlora.json
train_args/sft/qlora/xverse-13b-sft-qlora.json
+32
-0
train_args/sft/qlora/xverse-7b-sft-qlora.json
train_args/sft/qlora/xverse-7b-sft-qlora.json
+32
-0
train_args/sft/qlora/yi-34b-sft-qlora.json
train_args/sft/qlora/yi-34b-sft-qlora.json
+32
-0
train_args/sft/qlora/yi-6b-sft-qlora.json
train_args/sft/qlora/yi-6b-sft-qlora.json
+32
-0
train_args/sft/qlora/yi1.5-6b-sft-qlora.json
train_args/sft/qlora/yi1.5-6b-sft-qlora.json
+32
-0
train_args/sft/qlora/zephyr-7b-sft-qlora.json
train_args/sft/qlora/zephyr-7b-sft-qlora.json
+33
-0
train_args/sft/qlora/ziya2-13b-sft-qlora.json
train_args/sft/qlora/ziya2-13b-sft-qlora.json
+32
-0
unsloth
unsloth
+1
-0
unsloth.zip
unsloth.zip
+0
-0
No files found.
train_args/sft/qlora/orion-14b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-orion-14b-sft-qlora"
,
"model_name_or_path"
:
"OrionStarAI/Orion-14B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"orion"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/phi3-3.8b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-phi3-3.8b-sft-qlora"
,
"model_name_or_path"
:
"microsoft/Phi-3-mini-128k-instruct"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"phi3"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/qwen-14b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-qwen-14b-sft-qlora"
,
"model_name_or_path"
:
"Qwen/Qwen-14B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"qwen"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/qwen-7b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-qwen-7b-sft-qlora"
,
"model_name_or_path"
:
"Qwen/Qwen-7B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"qwen"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/qwen1.5-7b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-qwen1.5-7b-sft-qlora"
,
"model_name_or_path"
:
"Qwen/Qwen1.5-7B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"qwen"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"use_unsloth"
:
false
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/qwen1.5-moe-2.7b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-qwen1.5-moe-2.7b-sft-qlora"
,
"model_name_or_path"
:
"Qwen/Qwen1.5-MoE-A2.7B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"qwen"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/qwen2-7b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-qwen2-7b-sft-qlora"
,
"model_name_or_path"
:
"Qwen/Qwen2-7B-Instruct"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"qwen"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"use_unsloth"
:
false
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/vicuna-13b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-vicuna-13b-sft-qlora"
,
"model_name_or_path"
:
"lmsys/vicuna-13b-v1.5"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"vicuna"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
5
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/vicuna-7b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-vicuna-7b-sft-qlora"
,
"model_name_or_path"
:
"lmsys/vicuna-7b-v1.5"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"vicuna"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
5
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/xverse-13b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-xverse-13b-sft-qlora"
,
"model_name_or_path"
:
"xverse/XVERSE-13B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"xverse"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
5
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/xverse-7b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-xverse-7b-sft-qlora"
,
"model_name_or_path"
:
"xverse/XVERSE-7B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"xverse"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
5
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/yi-34b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-yi-34b-sft-qlora"
,
"model_name_or_path"
:
"01-ai/Yi-34B"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"yi"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/yi-6b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-yi-6b-sft-qlora"
,
"model_name_or_path"
:
"01-ai/Yi-6B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"yi"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/yi1.5-6b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-yi1.5-6b-sft-qlora"
,
"model_name_or_path"
:
"01-ai/Yi-1.5-6B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"yi"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/zephyr-7b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-zephyr-7b-sft-qlora"
,
"model_name_or_path"
:
"HuggingFaceH4/zephyr-7b-beta"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"zephyr"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
2e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"use_unsloth"
:
true
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
0
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
train_args/sft/qlora/ziya2-13b-sft-qlora.json
0 → 100644
View file @
d74a64c4
{
"output_dir"
:
"output/firefly-ziya2-13b-sft-qlora"
,
"model_name_or_path"
:
"IDEA-CCNL/Ziya2-13B-Chat"
,
"train_file"
:
"./data/dummy_data.jsonl"
,
"template_name"
:
"ziya2"
,
"num_train_epochs"
:
1
,
"per_device_train_batch_size"
:
1
,
"gradient_accumulation_steps"
:
16
,
"learning_rate"
:
1e-4
,
"max_seq_length"
:
1024
,
"logging_steps"
:
100
,
"save_steps"
:
100
,
"save_total_limit"
:
1
,
"lr_scheduler_type"
:
"constant_with_warmup"
,
"warmup_steps"
:
100
,
"lora_rank"
:
64
,
"lora_alpha"
:
16
,
"lora_dropout"
:
0.05
,
"gradient_checkpointing"
:
true
,
"disable_tqdm"
:
false
,
"optim"
:
"paged_adamw_32bit"
,
"seed"
:
42
,
"fp16"
:
true
,
"report_to"
:
"tensorboard"
,
"dataloader_num_workers"
:
10
,
"save_strategy"
:
"steps"
,
"weight_decay"
:
0
,
"max_grad_norm"
:
0.3
,
"remove_unused_columns"
:
false
}
\ No newline at end of file
unsloth
@
fd753fed
Subproject commit fd753fed99ed5f10ef8a9b7139588d9de9ddecfb
unsloth.zip
0 → 100644
View file @
d74a64c4
File added
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment