valohai.yaml 3.16 KB
Newer Older
Juha Kiili's avatar
Juha Kiili committed
1
2
3
---

- step:
4
    name: Execute python examples/text-classification/run_glue.py
Juha Kiili's avatar
Juha Kiili committed
5
6
7
8
9
    image: pytorch/pytorch:nightly-devel-cuda10.0-cudnn7
    command:
      - python /valohai/repository/utils/download_glue_data.py --data_dir=/glue_data
      - pip install -e .
      - pip install -r examples/requirements.txt
10
      - python examples/text-classification/run_glue.py --do_train --data_dir=/glue_data/{parameter-value:task_name} {parameters}
Juha Kiili's avatar
Juha Kiili committed
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
    parameters:
      - name: model_type
        pass-as: --model_type={v}
        type: string
        default: bert
      - name: model_name_or_path
        pass-as: --model_name_or_path={v}
        type: string
        default: bert-base-uncased
      - name: task_name
        pass-as: --task_name={v}
        type: string
        default: MRPC
      - name: max_seq_length
        pass-as: --max_seq_length={v}
        description: The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.
        type: integer
        default: 128
      - name: per_gpu_train_batch_size
        pass-as: --per_gpu_train_batch_size={v}
        description: Batch size per GPU/CPU for training.
        type: integer
        default: 8
      - name: per_gpu_eval_batch_size
        pass-as: --per_gpu_eval_batch_size={v}
        description: Batch size per GPU/CPU for evaluation.
        type: integer
        default: 8
      - name: gradient_accumulation_steps
        pass-as: --gradient_accumulation_steps={v}
        description: Number of updates steps to accumulate before performing a backward/update pass.
        type: integer
        default: 1
      - name: learning_rate
        pass-as: --learning_rate={v}
        description: The initial learning rate for Adam.
        type: float
        default: 0.00005
      - name: adam_epsilon
        pass-as: --adam_epsilon={v}
        description: Epsilon for Adam optimizer.
        type: float
        default: 0.00000001
      - name: max_grad_norm
        pass-as: --max_grad_norm={v}
        description: Max gradient norm.
        type: float
        default: 1.0
      - name: num_train_epochs
        pass-as: --num_train_epochs={v}
        description: Total number of training epochs to perform.
        type: integer
        default: 3
      - name: max_steps
        pass-as: --max_steps={v}
        description: If > 0, set total number of training steps to perform. Override num_train_epochs.
        type: integer
        default: -1
      - name: warmup_steps
        pass-as: --warmup_steps={v}
        description: Linear warmup over warmup_steps.
        type: integer
        default: -1
      - name: logging_steps
        pass-as: --logging_steps={v}
        description: Log every X updates steps.
        type: integer
        default: 25
      - name: save_steps
        pass-as: --save_steps={v}
        description: Save checkpoint every X updates steps.
        type: integer
        default: -1
      - name: output_dir
        pass-as: --output_dir={v}
        type: string
        default: /valohai/outputs
88
89
90
91
      - name: evaluation_strategy
        description: The evaluation strategy to use.
        type: string
        default: steps