{ "input": { "user_id": "user", "model_id": "llama-test", "run_id": "llama-test", "credentials": { "wandb_api_key": "", "hf_token": "" }, "args": { "base_model": "NousResearch/Meta-Llama-3-8B", "model_type": "LlamaForCausalLM", "tokenizer_type": "AutoTokenizer", "load_in_8bit": true, "load_in_4bit": false, "strict": false, "datasets": [ { "path": "mhenrichsen/alpaca_2k_test", "type": "alpaca" } ], "val_set_size": 0.05, "output_dir": "./outputs/lora-out", "sequence_len": 4096, "sample_packing": true, "eval_sample_packing": false, "pad_to_sequence_len": true, "adapter": "lora", "lora_r": 32, "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": true, "lora_modules_to_save": [ "embed_tokens", "lm_head" ], "gradient_accumulation_steps": 4, "micro_batch_size": 2, "num_epochs": 1, "optimizer": "adamw_bnb_8bit", "lr_scheduler": "cosine", "learning_rate": 0.0002, "train_on_inputs": false, "group_by_length": false, "bf16": "auto", "tf32": false, "gradient_checkpointing": true, "logging_steps": 1, "flash_attention": true, "warmup_steps": 1, "evals_per_epoch": 1, "eval_max_new_tokens": 128, "saves_per_epoch": 1, "weight_decay": 0.0, "special_tokens": { "pad_token": "<|end_of_text|>" } } } }