params.yaml 4.24 KB
Newer Older
Vishnu Banna's avatar
Vishnu Banna committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
runtime:
  all_reduce_alg: null
  batchnorm_spatial_persistent: false
  dataset_num_private_threads: null
  default_shard_dim: -1
  distribution_strategy: mirrored
  enable_xla: false
  gpu_thread_mode: null
  loss_scale: dynamic
  mixed_precision_dtype: float16
  num_cores_per_replica: 1
  num_gpus: 2
  num_packs: 1
  per_gpu_thread_count: 0
  run_eagerly: false
  task_index: -1
  tpu: null
  tpu_enable_xla_dynamic_padder: null
  worker_hosts: null
task:
  evaluation:
    top_k: 5
  gradient_clip_norm: 0.0
  init_checkpoint: ''
  logging_dir: null
  losses:
    l2_weight_decay: 0.0005
    label_smoothing: 0.0
    one_hot: true
  model:
    add_head_batch_norm: false
    backbone:
      darknet:
        depth_scale: 1.0
        dilate: false
        max_level: 5
        min_level: 3
        model_id: darknet53
        use_reorg_input: false
        use_separable_conv: false
        width_scale: 1.0
      type: darknet
    dropout_rate: 0.0
    input_size: [256, 256, 3]
    kernel_initializer: VarianceScaling
    norm_activation:
      activation: mish
      norm_epsilon: 0.001
      norm_momentum: 0.99
      use_sync_bn: true
    num_classes: 1001
  name: null
  train_data:
    aug_policy: null
    aug_rand_hflip: true
    aug_type: null
    block_length: 1
    cache: false
    color_jitter: 0.0
    cycle_length: 10
    decode_jpeg_only: true
    decoder:
      simple_decoder:
        mask_binarize_threshold: null
        regenerate_source_id: false
      type: simple_decoder
    deterministic: null
    drop_remainder: true
    dtype: float16
    enable_tf_data_service: false
    file_type: tfrecord
    global_batch_size: 16
    image_field_key: image/encoded
    input_path: ''
    is_multilabel: false
    is_training: true
    label_field_key: image/class/label
    mixup_and_cutmix: null
    randaug_magnitude: 10
    random_erasing: null
    seed: null
    sharding: true
    shuffle_buffer_size: 100
    tf_data_service_address: null
    tf_data_service_job_name: null
    tfds_as_supervised: false
    tfds_data_dir: ~/tensorflow_datasets/
    tfds_name: imagenet2012
    tfds_skip_decoding_feature: ''
    tfds_split: train
  validation_data:
    aug_policy: null
    aug_rand_hflip: true
    aug_type: null
    block_length: 1
    cache: false
    color_jitter: 0.0
    cycle_length: 10
    decode_jpeg_only: true
    decoder:
      simple_decoder:
        mask_binarize_threshold: null
        regenerate_source_id: false
      type: simple_decoder
    deterministic: null
    drop_remainder: false
    dtype: float16
    enable_tf_data_service: false
    file_type: tfrecord
    global_batch_size: 16
    image_field_key: image/encoded
    input_path: ''
    is_multilabel: false
    is_training: true
    label_field_key: image/class/label
    mixup_and_cutmix: null
    randaug_magnitude: 10
    random_erasing: null
    seed: null
    sharding: true
    shuffle_buffer_size: 100
    tf_data_service_address: null
    tf_data_service_job_name: null
    tfds_as_supervised: false
    tfds_data_dir: ~/tensorflow_datasets/
    tfds_name: imagenet2012
    tfds_skip_decoding_feature: ''
    tfds_split: validation
trainer:
  allow_tpu_summary: false
  best_checkpoint_eval_metric: ''
  best_checkpoint_export_subdir: ''
  best_checkpoint_metric_comp: higher
  checkpoint_interval: 10000
  continuous_eval_timeout: 3600
  eval_tf_function: true
  eval_tf_while_loop: false
  loss_upper_bound: 1000000.0
  max_to_keep: 5
  optimizer_config:
    ema: null
    learning_rate:
      polynomial:
        cycle: false
        decay_steps: 6392000
        end_learning_rate: 1.25e-05
        initial_learning_rate: 0.0125
        name: PolynomialDecay
        offset: 0
        power: 4.0
      type: polynomial
    optimizer:
      sgd:
        clipnorm: null
        clipvalue: null
        decay: 0.0
        global_clipnorm: null
        momentum: 0.9
        name: SGD
        nesterov: false
      type: sgd
    warmup:
      linear:
        name: linear
        warmup_learning_rate: 0
        warmup_steps: 8000
      type: linear
  recovery_begin_steps: 0
  recovery_max_trials: 0
  steps_per_loop: 10000
  summary_interval: 10000
  train_steps: 6400000
  train_tf_function: true
  train_tf_while_loop: true
  validation_interval: 10000
  validation_steps: 3200
  validation_summary_subdir: validation