train_full_deepspeed.yaml 397 Bytes
Newer Older
shihm's avatar
uodata  
shihm committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
model: Qwen/Qwen3-0.6B

model_class: llm

template: qwen3_nothink

kernel_config:
    name: auto
    include_kernels: auto

dist_config:
    name: deepspeed
    config_file: examples/deepspeed/ds_z3_config.json

### data
train_dataset: data/v1_sft_demo.yaml

### training
output_dir: outputs/Qwen3-0.6B-deepspeed
micro_batch_size: 1
cutoff_len: 2048
learning_rate: 1.0e-4
bf16: true
max_steps: 10