train_distilbart_xsum.sh 777 Bytes
Newer Older
1
2
3
#!/usr/bin/env bash
export PYTHONPATH="../":"${PYTHONPATH}"
python distillation.py \
4
  --teacher facebook/bart-large-xsum --data_dir xsum \
5
  --tokenizer_name facebook/bart-large-xsum \
6
7
  --student_decoder_layers 6 --student_encoder_layers 12 \
  --freeze_encoder --freeze_embeds \
8
9
10
  --learning_rate=3e-4 \
  --do_train \
  --do_predict \
11
12
  --fp16 --fp16_opt_level=O1 \
  --val_check_interval 0.1 --n_val 1000 --eval_beams 2 --length_penalty=0.5 \
13
14
  --max_target_length=60 --val_max_target_length=60 --test_max_target_length=100 \
  --model_name_or_path IGNORED \
15
16
17
18
  --alpha_hid=3. \
  --train_batch_size=16 --eval_batch_size=16 --gradient_accumulation_steps=2 \
  --sortish_sampler \
  --num_train_epochs=6 \
19
  --warmup_steps 500 \
20
  --output_dir distilbart_xsum_12_6 \
21
  "$@"