Commit d74a64c4 authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #1450 canceled with stages
{
"output_dir": "output/firefly-orion-14b-sft-qlora",
"model_name_or_path": "OrionStarAI/Orion-14B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "orion",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 1e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-phi3-3.8b-sft-qlora",
"model_name_or_path": "microsoft/Phi-3-mini-128k-instruct",
"train_file": "./data/dummy_data.jsonl",
"template_name": "phi3",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-qwen-14b-sft-qlora",
"model_name_or_path": "Qwen/Qwen-14B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "qwen",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 1e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-qwen-7b-sft-qlora",
"model_name_or_path": "Qwen/Qwen-7B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "qwen",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-qwen1.5-7b-sft-qlora",
"model_name_or_path": "Qwen/Qwen1.5-7B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "qwen",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"use_unsloth": false,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-qwen1.5-moe-2.7b-sft-qlora",
"model_name_or_path": "Qwen/Qwen1.5-MoE-A2.7B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "qwen",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-qwen2-7b-sft-qlora",
"model_name_or_path": "Qwen/Qwen2-7B-Instruct",
"train_file": "./data/dummy_data.jsonl",
"template_name": "qwen",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"use_unsloth": false,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-vicuna-13b-sft-qlora",
"model_name_or_path": "lmsys/vicuna-13b-v1.5",
"train_file": "./data/dummy_data.jsonl",
"template_name": "vicuna",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 1e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 5,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-vicuna-7b-sft-qlora",
"model_name_or_path": "lmsys/vicuna-7b-v1.5",
"train_file": "./data/dummy_data.jsonl",
"template_name": "vicuna",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 5,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-xverse-13b-sft-qlora",
"model_name_or_path": "xverse/XVERSE-13B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "xverse",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 1e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 5,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-xverse-7b-sft-qlora",
"model_name_or_path": "xverse/XVERSE-7B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "xverse",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 5,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-yi-34b-sft-qlora",
"model_name_or_path": "01-ai/Yi-34B",
"train_file": "./data/dummy_data.jsonl",
"template_name": "yi",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 1e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-yi-6b-sft-qlora",
"model_name_or_path": "01-ai/Yi-6B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "yi",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-yi1.5-6b-sft-qlora",
"model_name_or_path": "01-ai/Yi-1.5-6B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "yi",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-zephyr-7b-sft-qlora",
"model_name_or_path": "HuggingFaceH4/zephyr-7b-beta",
"train_file": "./data/dummy_data.jsonl",
"template_name": "zephyr",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 2e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"use_unsloth": true,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
{
"output_dir": "output/firefly-ziya2-13b-sft-qlora",
"model_name_or_path": "IDEA-CCNL/Ziya2-13B-Chat",
"train_file": "./data/dummy_data.jsonl",
"template_name": "ziya2",
"num_train_epochs": 1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 1e-4,
"max_seq_length": 1024,
"logging_steps": 100,
"save_steps": 100,
"save_total_limit": 1,
"lr_scheduler_type": "constant_with_warmup",
"warmup_steps": 100,
"lora_rank": 64,
"lora_alpha": 16,
"lora_dropout": 0.05,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "paged_adamw_32bit",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 10,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 0.3,
"remove_unused_columns": false
}
\ No newline at end of file
unsloth @ fd753fed
Subproject commit fd753fed99ed5f10ef8a9b7139588d9de9ddecfb
File added
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment