lora.yaml 1.3 KB
Newer Older
Rayyyyy's avatar
Rayyyyy committed
1
2
3
4
5
data_config:
  train_file: train.jsonl
  val_file: dev.jsonl
  test_file: dev.jsonl
  num_proc: 1
Rayyyyy's avatar
Rayyyyy committed
6
7
8

combine: True
freezeV: True
Rayyyyy's avatar
Rayyyyy committed
9
10
max_input_length: 512
max_output_length: 512
Rayyyyy's avatar
Rayyyyy committed
11
12
# swanlab: "local"  # set to local if don`t use cloud

Rayyyyy's avatar
Rayyyyy committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
training_args:
  # see `transformers.Seq2SeqTrainingArguments`
  output_dir: ./output
  max_steps: 3000
  # needed to be fit for the dataset
  learning_rate: 5e-4
  # settings for data loading
  per_device_train_batch_size: 1
  dataloader_num_workers: 16
  remove_unused_columns: false
  # settings for saving checkpoints
  save_strategy: steps
  save_steps: 500
  # settings for logging
  log_level: info
  logging_strategy: steps
  logging_steps: 10
Rayyyyy's avatar
Rayyyyy committed
30
  run_name: "glm4-lora-finetune"
Rayyyyy's avatar
Rayyyyy committed
31
32
  # settings for evaluation
  per_device_eval_batch_size: 4
Rayyyyy's avatar
Rayyyyy committed
33
  eval_strategy: steps
Rayyyyy's avatar
Rayyyyy committed
34
35
36
37
38
39
40
41
42
43
  eval_steps: 500
  # settings for optimizer
  # adam_epsilon: 1e-6
  # uncomment the following line to detect nan or inf values
  # debug: underflow_overflow
  predict_with_generate: true
  # see `transformers.GenerationConfig`
  generation_config:
    max_new_tokens: 512
  # set your absolute deepspeed path here
Rayyyyy's avatar
Rayyyyy committed
44
45
46
  # deepspeed: configs/ds_zero_3.json
  deepspeed: configs/ds_zero_2.json

Rayyyyy's avatar
Rayyyyy committed
47
48
49
50
51
52
peft_config:
  peft_type: LORA
  task_type: CAUSAL_LM
  r: 8
  lora_alpha: 32
  lora_dropout: 0.1
Rayyyyy's avatar
Rayyyyy committed
53
  target_modules: ["q_proj", "k_proj", "v_proj"]