train_full_fsdp2.yaml 556 Bytes
Newer Older
shihm's avatar
uodata  
shihm committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
model: Qwen/Qwen3-0.6B
trust_remote_code: true
model_class: llm

template: qwen3_nothink

kernel_config:
  name: auto
  include_kernels: auto # choice: null/true/false/auto/kernel_id1,kernel_id2,kernel_id3, default is null

quant_config: null

dist_config:
  name: fsdp2
  dcp_path: null # /mnt/f/pretrain_models/Qwen3-0.6B-dcp

### data
train_dataset: data/v1_sft_demo.yaml

### training
output_dir: outputs/test_fsdp2
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: false
max_steps: 10

### sample
sample_backend: hf
max_new_tokens: 128